1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
|
.\" Hey, Emacs, edit this file in -*- nroff-fill -*- mode
.\"-
.\" Copyright (c) 1997, 1998, 2003
.\" Nan Yang Computer Services Limited. All rights reserved.
.\"
.\" This software is distributed under the so-called ``Berkeley
.\" License'':
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. All advertising materials mentioning features or use of this software
.\" must display the following acknowledgement:
.\" This product includes software developed by Nan Yang Computer
.\" Services Limited.
.\" 4. Neither the name of the Company nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" This software is provided ``as is'', and any express or implied
.\" warranties, including, but not limited to, the implied warranties of
.\" merchantability and fitness for a particular purpose are disclaimed.
.\" In no event shall the company or contributors be liable for any
.\" direct, indirect, incidental, special, exemplary, or consequential
.\" damages (including, but not limited to, procurement of substitute
.\" goods or services; loss of use, data, or profits; or business
.\" interruption) however caused and on any theory of liability, whether
.\" in contract, strict liability, or tort (including negligence or
.\" otherwise) arising in any way out of the use of this software, even if
.\" advised of the possibility of such damage.
.\"
.\" $FreeBSD$
.\"
.Dd May 16, 2002
.Dt VINUM 4
.Os
.Sh NAME
.Nm vinum
.Nd Logical Volume Manager
.Sh SYNOPSIS
.Cd "device vinum"
.Sh DESCRIPTION
.Nm
is a logical volume manager inspired by, but not derived from, the Veritas
Volume Manager.
It provides the following features:
.Bl -bullet
.It
It provides device-independent logical disks, called
.Em volumes .
Volumes are
not restricted to the size of any disk on the system.
.It
The volumes consist of one or more
.Em plexes ,
each of which contain the
entire address space of a volume.
This represents an implementation of RAID-1
(mirroring).
Multiple plexes can also be used for:
.\" XXX What about sparse plexes? Do we want them?
.Bl -bullet
.It
Increased read throughput.
.Nm
will read data from the least active disk, so if a volume has plexes on multiple
disks, more data can be read in parallel.
.Nm
reads data from only one plex, but it writes data to all plexes.
.It
Increased reliability.
By storing plexes on different disks, data will remain
available even if one of the plexes becomes unavailable.
In comparison with a
RAID-5 plex (see below), using multiple plexes requires more storage space, but
gives better performance, particularly in the case of a drive failure.
.It
Additional plexes can be used for on-line data reorganization.
By attaching an
additional plex and subsequently detaching one of the older plexes, data can be
moved on-line without compromising access.
.It
An additional plex can be used to obtain a consistent dump of a file system.
By
attaching an additional plex and detaching at a specific time, the detached plex
becomes an accurate snapshot of the file system at the time of detachment.
.\" Make sure to flush!
.El
.It
Each plex consists of one or more logical disk slices, called
.Em subdisks .
Subdisks are defined as a contiguous block of physical disk storage.
A plex may
consist of any reasonable number of subdisks (in other words, the real limit is
not the number, but other factors, such as memory and performance, associated
with maintaining a large number of subdisks).
.It
A number of mappings between subdisks and plexes are available:
.Bl -bullet
.It
.Em "Concatenated plexes"
consist of one or more subdisks, each of which
is mapped to a contiguous part of the plex address space.
.It
.Em "Striped plexes"
consist of two or more subdisks of equal size.
The file
address space is mapped in
.Em stripes ,
integral fractions of the subdisk
size.
Consecutive plex address space is mapped to stripes in each subdisk in
turn.
.if t \{\
.ig
.\" FIXME
.br
.ne 1.5i
.PS
move right 2i
down
SD0: box
SD1: box
SD2: box
"plex 0" at SD0.n+(0,.2)
"subdisk 0" rjust at SD0.w-(.2,0)
"subdisk 1" rjust at SD1.w-(.2,0)
"subdisk 2" rjust at SD2.w-(.2,0)
.PE
..
.\}
The subdisks of a striped plex must all be the same size.
.It
.Em "RAID-5 plexes"
require at least three equal-sized subdisks.
They
resemble striped plexes, except that in each stripe, one subdisk stores parity
information.
This subdisk changes in each stripe: in the first stripe, it is the
first subdisk, in the second it is the second subdisk, etc.
In the event of a
single disk failure,
.Nm
will recover the data based on the information stored on the remaining subdisks.
This mapping is particularly suited to read-intensive access.
The subdisks of a
RAID-5 plex must all be the same size.
.\" Make sure to flush!
.El
.It
.Em Drives
are the lowest level of the storage hierarchy.
They represent disk special
devices.
.It
.Nm
offers automatic startup.
Unlike
.Ux
file systems,
.Nm
volumes contain all the configuration information needed to ensure that they are
started correctly when the subsystem is enabled.
This is also a significant
advantage over the Veritas\(tm File System.
This feature regards the presence
of the volumes.
It does not mean that the volumes will be mounted
automatically, since the standard startup procedures with
.Pa /etc/fstab
perform this function.
.El
.Sh KERNEL CONFIGURATION
.Nm
is currently supplied as a KLD module, and does not require
configuration.
As with other KLDs, it is absolutely necessary to match the KLD
to the version of the operating system.
Failure to do so will cause
.Nm
to issue an error message and terminate.
.Pp
It is possible to configure
.Nm
in the kernel, but this is not recommended.
To do so, add this line to the
kernel configuration file:
.Pp
.D1 Cd "device vinum"
.Ss Debug Options
The current version of
.Nm ,
both the kernel module and the user program
.Xr vinum 8 ,
include significant debugging support.
It is not recommended to remove
this support at the moment, but if you do you must remove it from both the
kernel and the user components.
To do this, edit the files
.Pa /usr/src/sbin/vinum/Makefile
and
.Pa /usr/src/sys/modules/vinum/Makefile
and edit the
.Va CFLAGS
variable to remove the
.Li -DVINUMDEBUG
option.
If you have
configured
.Nm
into the kernel, either specify the line
.Pp
.D1 Cd "options VINUMDEBUG"
.Pp
in the kernel configuration file or remove the
.Li -DVINUMDEBUG
option from
.Pa /usr/src/sbin/vinum/Makefile
as described above.
.Pp
If the
.Va VINUMDEBUG
variables do not match,
.Xr vinum 8
will fail with a message
explaining the problem and what to do to correct it.
.Ss Other Options
.Cd "options VINUM_AUTOSTART"
.Pp
Make
.Nm
automatically scan all available disks at attach time.
This is a deprecated way that is primarily intended for environments
that do not want to rely on kernel environment variables set by
.Xr loader 8 .
.Pp
.Nm
was previously available in two versions: a freely available version which did
not contain RAID-5 functionality, and a full version including RAID-5
functionality, which was available only from Cybernet Systems Inc.
The present
version of
.Nm
includes the RAID-5 functionality.
.Sh RUNNING VINUM
.Nm
is part of the base
.Fx
system.
It does not require installation.
To start it, start the
.Xr vinum 8
program, which will load the KLD if it is not already present.
Before using
.Nm ,
it must be configured.
See
.Xr vinum 8
for information on how to create a
.Nm
configuration.
.Pp
Normally, you start a configured version of
.Nm
at boot time.
Set the variable
.Va start_vinum
in
.Pa /etc/rc.conf
to
.Dq Li YES
to start
.Nm
at boot time.
(See
.Xr rc.conf 5
for more details.)
.Pp
If
.Nm
is loaded as a KLD (the recommended way), the
.Nm vinum Cm stop
command will unload it
(see
.Xr vinum 8 ) .
You can also do this with the
.Xr kldunload 8
command.
.Pp
The KLD can only be unloaded when idle, in other words when no volumes are
mounted and no other instances of the
.Xr vinum 8
program are active.
Unloading the KLD does not harm the data in the volumes.
.Ss Configuring and Starting Objects
Use the
.Xr vinum 8
utility to configure and start
.Nm
objects.
.Sh AUTOMATIC STARTUP
The
.Nm
subsystem can be automatically started at attach time.
There are two kernel environment variables that can be set in
.Xr loader.conf 5
to accomplish this.
.Bl -tag -width ".Va vinum.autostart" -offset indent
.It Va vinum.autostart
If this variable is set (to any value), the attach function will attempt
to scan all available disks for valid
.Nm
configuration records.
This is the preferred way if automatic startup is desired.
.Pp
Example:
.Dl vinum.autostart="YES"
.It Va vinum.drives
Alternatively, this variable can enumerate a list of disk devices
to scan for configuration records.
Note that only the
.Dq bare
device names need to be given, since
.Nm
will automatically scan all possible slices and partitions.
.Pp
Example:
.Dl vinum.drives="da0 da1"
.El
.Pp
If automatic startup is used, it is not necessary to set the
.Va start_vinum
variable of
.Xr rc.conf 5 .
Note that if
.Nm
is to supply to the volume for the root file system, it is necessary
to start the subsystem early.
This can be achieved by specifying
.Pp
.Dl vinum_load="YES"
.Pp
in
.Xr loader.conf 5 .
.Sh IOCTL CALLS
.Xr ioctl 2
calls are intended for the use of the
.Xr vinum 8
configuration program only.
They are described in the header file
.Pa /sys/dev/vinum/vinumio.h .
.Ss Disk Labels
Conventional disk special devices have a
.Em "disk label"
in the second sector of the device.
See
.Xr disklabel 5
for more details.
This disk label describes the layout of the partitions within
the device.
.Nm
does not subdivide volumes, so volumes do not contain a physical disk label.
For convenience,
.Nm
implements the ioctl calls
.Dv DIOCGDINFO
(get disk label),
.Dv DIOCGPART
(get partition information),
.Dv DIOCWDINFO
(write partition information) and
.Dv DIOCSDINFO
(set partition information).
.Dv DIOCGDINFO
and
.Dv DIOCGPART
refer to an internal
representation of the disk label which is not present on the volume.
As a
result, the
.Fl r
option of
.Xr disklabel 8 ,
which reads the
.Dq "raw disk" ,
will fail.
.Pp
In general,
.Xr disklabel 8
serves no useful purpose on a
.Nm
volume.
If you run it, it will show you
three partitions,
.Ql a ,
.Ql b
and
.Ql c ,
all the same except for the
.Va fstype ,
for example:
.Bd -literal
3 partitions:
# size offset fstype [fsize bsize bps/cpg]
a: 2048 0 4.2BSD 1024 8192 0 # (Cyl. 0 - 0)
b: 2048 0 swap # (Cyl. 0 - 0)
c: 2048 0 unused 0 0 # (Cyl. 0 - 0)
.Ed
.Pp
.Nm
ignores the
.Dv DIOCWDINFO
and
.Dv DIOCSDINFO
ioctls, since there is nothing to change.
As a result, any attempt to modify the disk label will be silently ignored.
.Sh MAKING FILE SYSTEMS
Since
.Nm
volumes do not contain partitions, the names do not need to conform to the
standard rules for naming disk partitions.
For a physical disk partition, the
last letter of the device name specifies the partition identifier (a to h).
.Nm
volumes need not conform to this convention, but if they do not,
.Xr newfs 8
will complain that it cannot determine the partition.
To solve this problem,
use the
.Fl v
flag to
.Xr newfs 8 .
For example, if you have a volume
.Pa concat ,
use the following command to create a UFS file system on it:
.Pp
.Dl "newfs -v /dev/vinum/concat"
.Sh OBJECT NAMING
.Nm
assigns default names to plexes and subdisks, although they may be overridden.
We do not recommend overriding the default names.
Experience with the
Veritas\(tm
volume manager, which allows arbitrary naming of objects, has shown that this
flexibility does not bring a significant advantage, and it can cause confusion.
.Pp
Names may contain any non-blank character, but it is recommended to restrict
them to letters, digits and the underscore characters.
The names of volumes,
plexes and subdisks may be up to 64 characters long, and the names of drives may
up to 32 characters long.
When choosing volume and plex names, bear in mind
that automatically generated plex and subdisk names are longer than the name
from which they are derived.
.Bl -bullet
.It
When
.Nm
creates or deletes objects, it creates a directory
.Pa /dev/vinum ,
in which it makes device entries for each volume it finds.
It also creates
subdirectories,
.Pa /dev/vinum/plex
and
.Pa /dev/vinum/sd ,
in which it stores device entries for plexes and subdisks.
In addition, it creates two more directories,
.Pa /dev/vinum/vol
and
.Pa /dev/vinum/drive ,
in which it stores hierarchical information for volumes and drives.
.It
In addition,
.Nm
creates three super-devices,
.Pa /dev/vinum/control ,
.Pa /dev/vinum/Control
and
.Pa /dev/vinum/controld .
.Pa /dev/vinum/control
is used by
.Xr vinum 8
when it has been compiled without the
.Dv VINUMDEBUG
option,
.Pa /dev/vinum/Control
is used by
.Xr vinum 8
when it has been compiled with the
.Dv VINUMDEBUG
option, and
.Pa /dev/vinum/controld
is used by the
.Nm
daemon.
The two control devices for
.Xr vinum 8
are used to synchronize the debug status of kernel and user modules.
.It
Unlike
.Ux
drives,
.Nm
volumes are not subdivided into partitions, and thus do not contain a disk
label.
Unfortunately, this confuses a number of utilities, notably
.Xr newfs 8 ,
which normally tries to interpret the last letter of a
.Nm
volume name as a partition identifier.
If you use a volume name which does not
end in the letters
.Ql a
to
.Ql c ,
you must use the
.Fl v
flag to
.Xr newfs 8
in order to tell it to ignore this convention.
.\"
.It
Plexes do not need to be assigned explicit names.
By default, a plex name is
the name of the volume followed by the letters
.Pa .p
and the number of the
plex.
For example, the plexes of volume
.Pa vol3
are called
.Pa vol3.p0 , vol3.p1
and so on.
These names can be overridden, but it is not recommended.
.It
Like plexes, subdisks are assigned names automatically, and explicit naming is
discouraged.
A subdisk name is the name of the plex followed by the letters
.Pa .s
and a number identifying the subdisk.
For example, the subdisks of
plex
.Pa vol3.p0
are called
.Pa vol3.p0.s0 , vol3.p0.s1
and so on.
.It
By contrast,
.Em drives
must be named.
This makes it possible to move a drive to a different location
and still recognize it automatically.
Drive names may be up to 32 characters
long.
.El
.Ss Example
Assume the
.Nm
objects described in the section
.Sx "CONFIGURATION FILE"
in
.Xr vinum 8 .
The directory
.Pa /dev/vinum
looks like:
.Bd -literal -offset indent
# ls -lR /dev/vinum
total 5
brwxr-xr-- 1 root wheel 25, 2 Mar 30 16:08 concat
brwx------ 1 root wheel 25, 0x40000000 Mar 30 16:08 control
brwx------ 1 root wheel 25, 0x40000001 Mar 30 16:08 controld
drwxrwxrwx 2 root wheel 512 Mar 30 16:08 drive
drwxrwxrwx 2 root wheel 512 Mar 30 16:08 plex
drwxrwxrwx 2 root wheel 512 Mar 30 16:08 rvol
drwxrwxrwx 2 root wheel 512 Mar 30 16:08 sd
brwxr-xr-- 1 root wheel 25, 3 Mar 30 16:08 strcon
brwxr-xr-- 1 root wheel 25, 1 Mar 30 16:08 stripe
brwxr-xr-- 1 root wheel 25, 0 Mar 30 16:08 tinyvol
drwxrwxrwx 7 root wheel 512 Mar 30 16:08 vol
brwxr-xr-- 1 root wheel 25, 4 Mar 30 16:08 vol5
/dev/vinum/drive:
total 0
brw-r----- 1 root operator 4, 15 Oct 21 16:51 drive2
brw-r----- 1 root operator 4, 31 Oct 21 16:51 drive4
/dev/vinum/plex:
total 0
brwxr-xr-- 1 root wheel 25, 0x10000002 Mar 30 16:08 concat.p0
brwxr-xr-- 1 root wheel 25, 0x10010002 Mar 30 16:08 concat.p1
brwxr-xr-- 1 root wheel 25, 0x10000003 Mar 30 16:08 strcon.p0
brwxr-xr-- 1 root wheel 25, 0x10010003 Mar 30 16:08 strcon.p1
brwxr-xr-- 1 root wheel 25, 0x10000001 Mar 30 16:08 stripe.p0
brwxr-xr-- 1 root wheel 25, 0x10000000 Mar 30 16:08 tinyvol.p0
brwxr-xr-- 1 root wheel 25, 0x10000004 Mar 30 16:08 vol5.p0
brwxr-xr-- 1 root wheel 25, 0x10010004 Mar 30 16:08 vol5.p1
/dev/vinum/sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000002 Mar 30 16:08 concat.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100002 Mar 30 16:08 concat.p0.s1
brwxr-xr-- 1 root wheel 25, 0x20010002 Mar 30 16:08 concat.p1.s0
brwxr-xr-- 1 root wheel 25, 0x20000003 Mar 30 16:08 strcon.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100003 Mar 30 16:08 strcon.p0.s1
brwxr-xr-- 1 root wheel 25, 0x20010003 Mar 30 16:08 strcon.p1.s0
brwxr-xr-- 1 root wheel 25, 0x20110003 Mar 30 16:08 strcon.p1.s1
brwxr-xr-- 1 root wheel 25, 0x20000001 Mar 30 16:08 stripe.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100001 Mar 30 16:08 stripe.p0.s1
brwxr-xr-- 1 root wheel 25, 0x20000000 Mar 30 16:08 tinyvol.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100000 Mar 30 16:08 tinyvol.p0.s1
brwxr-xr-- 1 root wheel 25, 0x20000004 Mar 30 16:08 vol5.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100004 Mar 30 16:08 vol5.p0.s1
brwxr-xr-- 1 root wheel 25, 0x20010004 Mar 30 16:08 vol5.p1.s0
brwxr-xr-- 1 root wheel 25, 0x20110004 Mar 30 16:08 vol5.p1.s1
/dev/vinum/vol:
total 5
brwxr-xr-- 1 root wheel 25, 2 Mar 30 16:08 concat
drwxr-xr-x 4 root wheel 512 Mar 30 16:08 concat.plex
brwxr-xr-- 1 root wheel 25, 3 Mar 30 16:08 strcon
drwxr-xr-x 4 root wheel 512 Mar 30 16:08 strcon.plex
brwxr-xr-- 1 root wheel 25, 1 Mar 30 16:08 stripe
drwxr-xr-x 3 root wheel 512 Mar 30 16:08 stripe.plex
brwxr-xr-- 1 root wheel 25, 0 Mar 30 16:08 tinyvol
drwxr-xr-x 3 root wheel 512 Mar 30 16:08 tinyvol.plex
brwxr-xr-- 1 root wheel 25, 4 Mar 30 16:08 vol5
drwxr-xr-x 4 root wheel 512 Mar 30 16:08 vol5.plex
/dev/vinum/vol/concat.plex:
total 2
brwxr-xr-- 1 root wheel 25, 0x10000002 Mar 30 16:08 concat.p0
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 concat.p0.sd
brwxr-xr-- 1 root wheel 25, 0x10010002 Mar 30 16:08 concat.p1
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 concat.p1.sd
/dev/vinum/vol/concat.plex/concat.p0.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000002 Mar 30 16:08 concat.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100002 Mar 30 16:08 concat.p0.s1
/dev/vinum/vol/concat.plex/concat.p1.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20010002 Mar 30 16:08 concat.p1.s0
/dev/vinum/vol/strcon.plex:
total 2
brwxr-xr-- 1 root wheel 25, 0x10000003 Mar 30 16:08 strcon.p0
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 strcon.p0.sd
brwxr-xr-- 1 root wheel 25, 0x10010003 Mar 30 16:08 strcon.p1
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 strcon.p1.sd
/dev/vinum/vol/strcon.plex/strcon.p0.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000003 Mar 30 16:08 strcon.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100003 Mar 30 16:08 strcon.p0.s1
/dev/vinum/vol/strcon.plex/strcon.p1.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20010003 Mar 30 16:08 strcon.p1.s0
brwxr-xr-- 1 root wheel 25, 0x20110003 Mar 30 16:08 strcon.p1.s1
/dev/vinum/vol/stripe.plex:
total 1
brwxr-xr-- 1 root wheel 25, 0x10000001 Mar 30 16:08 stripe.p0
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 stripe.p0.sd
/dev/vinum/vol/stripe.plex/stripe.p0.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000001 Mar 30 16:08 stripe.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100001 Mar 30 16:08 stripe.p0.s1
/dev/vinum/vol/tinyvol.plex:
total 1
brwxr-xr-- 1 root wheel 25, 0x10000000 Mar 30 16:08 tinyvol.p0
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 tinyvol.p0.sd
/dev/vinum/vol/tinyvol.plex/tinyvol.p0.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000000 Mar 30 16:08 tinyvol.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100000 Mar 30 16:08 tinyvol.p0.s1
/dev/vinum/vol/vol5.plex:
total 2
brwxr-xr-- 1 root wheel 25, 0x10000004 Mar 30 16:08 vol5.p0
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 vol5.p0.sd
brwxr-xr-- 1 root wheel 25, 0x10010004 Mar 30 16:08 vol5.p1
drwxr-xr-x 2 root wheel 512 Mar 30 16:08 vol5.p1.sd
/dev/vinum/vol/vol5.plex/vol5.p0.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20000004 Mar 30 16:08 vol5.p0.s0
brwxr-xr-- 1 root wheel 25, 0x20100004 Mar 30 16:08 vol5.p0.s1
/dev/vinum/vol/vol5.plex/vol5.p1.sd:
total 0
brwxr-xr-- 1 root wheel 25, 0x20010004 Mar 30 16:08 vol5.p1.s0
brwxr-xr-- 1 root wheel 25, 0x20110004 Mar 30 16:08 vol5.p1.s1
.Ed
.Pp
In the case of unattached plexes and subdisks, the naming is reversed.
Subdisks
are named after the disk on which they are located, and plexes are named after
the subdisk.
.\" XXX
.Bf -symbolic
This mapping is still to be determined.
.Ef
.Ss Object States
Each
.Nm
object has a
.Em state
associated with it.
.Nm
uses this state to determine the handling of the object.
.Ss Volume States
Volumes may have the following states:
.Bl -hang -width 14n
.It Em down
The volume is completely inaccessible.
.It Em up
The volume is up and at least partially functional.
Not all plexes may be
available.
.El
.Ss "Plex States"
Plexes may have the following states:
.Bl -hang -width 14n
.It Em referenced
A plex entry which has been referenced as part of a volume, but which is
currently not known.
.It Em faulty
A plex which has gone completely down because of I/O errors.
.It Em down
A plex which has been taken down by the administrator.
.It Em initializing
A plex which is being initialized.
.El
.Pp
The remaining states represent plexes which are at least partially up.
.Bl -hang -width 14n
.It Em corrupt
A plex entry which is at least partially up.
Not all subdisks are available,
and an inconsistency has occurred.
If no other plex is uncorrupted, the volume
is no longer consistent.
.It Em degraded
A RAID-5 plex entry which is accessible, but one subdisk is down, requiring
recovery for many I/O requests.
.It Em flaky
A plex which is really up, but which has a reborn subdisk which we do not
completely trust, and which we do not want to read if we can avoid it.
.It Em up
A plex entry which is completely up.
All subdisks are up.
.El
.Ss "Subdisk States"
Subdisks can have the following states:
.Bl -hang -width 14n
.It Em empty
A subdisk entry which has been created completely.
All fields are correct, and
the disk has been updated, but the on the disk is not valid.
.It Em referenced
A subdisk entry which has been referenced as part of a plex, but which is
currently not known.
.It Em initializing
A subdisk entry which has been created completely and which is currently being
initialized.
.El
.Pp
The following states represent invalid data.
.Bl -hang -width 14n
.It Em obsolete
A subdisk entry which has been created completely.
All fields are correct, the
config on disk has been updated, and the data was valid, but since then the
drive has been taken down, and as a result updates have been missed.
.It Em stale
A subdisk entry which has been created completely.
All fields are correct, the
disk has been updated, and the data was valid, but since then the drive has been
crashed and updates have been lost.
.El
.Pp
The following states represent valid, inaccessible data.
.Bl -hang -width 14n
.It Em crashed
A subdisk entry which has been created completely.
All fields are correct, the
disk has been updated, and the data was valid, but since then the drive has gone
down.
No attempt has been made to write to the subdisk since the crash, so the
data is valid.
.It Em down
A subdisk entry which was up, which contained valid data, and which was taken
down by the administrator.
The data is valid.
.It Em reviving
The subdisk is currently in the process of being revived.
We can write but not
read.
.El
.Pp
The following states represent accessible subdisks with valid data.
.Bl -hang -width 14n
.It Em reborn
A subdisk entry which has been created completely.
All fields are correct, the
disk has been updated, and the data was valid, but since then the drive has gone
down and up again.
No updates were lost, but it is possible that the subdisk
has been damaged.
We will not read from this subdisk if we have a choice.
If this
is the only subdisk which covers this address space in the plex, we set its
state to up under these circumstances, so this status implies that there is
another subdisk to fulfill the request.
.It Em up
A subdisk entry which has been created completely.
All fields are correct, the
disk has been updated, and the data is valid.
.El
.Ss "Drive States"
Drives can have the following states:
.Bl -hang -width 14n
.It Em referenced
At least one subdisk refers to the drive, but it is not currently accessible to
the system.
No device name is known.
.It Em down
The drive is not accessible.
.It Em up
The drive is up and running.
.El
.Sh BUGS
.Nm
is a new product.
Bugs can be expected.
The configuration mechanism is not yet
fully functional.
If you have difficulties, please look at the section
.Sx "DEBUGGING PROBLEMS WITH VINUM"
before reporting problems.
.Pp
Kernels with the
.Nm
device appear to work, but are not supported.
If you have trouble with
this configuration, please first replace the kernel with a
.No non- Ns Nm
kernel and test with the KLD module.
.Pp
Detection of differences between the version of the kernel and the KLD is not
yet implemented.
.Pp
The RAID-5 functionality is new in
.Fx 3.3 .
Some problems have been
reported with
.Nm
in combination with soft updates, but these are not reproducible on all
systems.
If you are planning to use
.Nm
in a production environment, please test carefully.
.Sh DEBUGGING PROBLEMS WITH VINUM
Solving problems with
.Nm
can be a difficult affair.
This section suggests some approaches.
.Ss Configuration problems
It is relatively easy (too easy) to run into problems with the
.Nm
configuration.
If you do, the first thing you should do is stop configuration
updates:
.Pp
.Dl "vinum setdaemon 4"
.Pp
This will stop updates and any further corruption of the on-disk configuration.
.Pp
Next, look at the on-disk configuration, using a Bourne-style shell:
.Bd -literal
rm -f log
for i in /dev/da0s1h /dev/da1s1h /dev/da2s1h /dev/da3s1h; do
(dd if=$i skip=8 count=6|tr -d '\e000-\e011\e200-\e377'; echo) >> log
done
.Ed
.Pp
The names of the devices are the names of all
.Nm
slices.
The file
.Pa log
should then contain something like this:
.Bd -literal
.if t .ps -3
.if t .vs -3
IN VINOpanic.lemis.comdrive1}6E7~^K6T^Yfoovolume obj state up
volume src state up
volume raid state down
volume r state down
volume foo state up
plex name obj.p0 state corrupt org concat vol obj
plex name obj.p1 state corrupt org striped 128b vol obj
plex name src.p0 state corrupt org striped 128b vol src
plex name src.p1 state up org concat vol src
plex name raid.p0 state faulty org disorg vol raid
plex name r.p0 state faulty org disorg vol r
plex name foo.p0 state up org concat vol foo
plex name foo.p1 state faulty org concat vol foo
sd name obj.p0.s0 drive drive2 plex obj.p0 state reborn len 409600b driveoffset 265b plexoffset 0b
sd name obj.p0.s1 drive drive4 plex obj.p0 state up len 409600b driveoffset 265b plexoffset 409600b
sd name obj.p1.s0 drive drive1 plex obj.p1 state up len 204800b driveoffset 265b plexoffset 0b
sd name obj.p1.s1 drive drive2 plex obj.p1 state reborn len 204800b driveoffset 409865b plexoffset 128b
sd name obj.p1.s2 drive drive3 plex obj.p1 state up len 204800b driveoffset 265b plexoffset 256b
sd name obj.p1.s3 drive drive4 plex obj.p1 state up len 204800b driveoffset 409865b plexoffset 384b
.if t .vs
.if t .ps
.Ed
.Pp
The first line contains the
.Nm
label and must start with the text
.Dq Li "IN VINO" .
It also contains the name of the system.
The exact definition is contained in
.Pa /usr/src/sys/dev/vinum/vinumvar.h .
The saved configuration starts in the middle of the line with the text
.Dq Li "volume obj state up"
and starts in sector 9 of the disk.
The rest of the output shows the remainder of the on-disk configuration.
It
may be necessary to increase the
.Cm count
argument of
.Xr dd 1
in order to see the complete configuration.
.Pp
The configuration on all disks should be the same.
If this is not the case,
please report the problem with the exact contents of the file
.Pa log .
There is probably little that can be done to recover the on-disk configuration,
but if you keep a copy of the files used to create the objects, you should be
able to re-create them.
The
.Ic create
command does not change the subdisk data, so this will not cause data
corruption.
You may need to use the
.Ic resetconfig
command if you have this kind of trouble.
.Ss Kernel Panics
In order to analyse a panic which you suspect comes from
.Nm
you will need to build a debug kernel.
See the online handbook at
.Pa /usr/share/doc/en/books/developers-handbook/kerneldebug.html
(if installed) or
.Pa http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/developers-\%handbook/kerneldebug.html
for more details of how to do this.
.Pp
Perform the following steps to analyse a
.Nm
problem:
.Bl -enum
.It
Copy the files
.Pa /usr/src/sys/modules/vinum/.gdbinit.crash ,
.Pa /usr/src/sys/modules/vinum/.gdbinit.kernel ,
.Pa /usr/src/sys/modules/vinum/.gdbinit.serial ,
.Pa /usr/src/sys/modules/vinum/.gdbinit.vinum
and
.Pa /usr/src/sys/modules/vinum/.gdbinit.vinum.paths
to the directory in which you will be performing the analysis, typically
.Pa /var/crash .
.It
Make sure that you build the
.Nm
module with debugging information.
The standard
.Pa Makefile
builds a module with debugging symbols by default.
If the version of
.Nm
in
.Pa /boot/kernel
does not contain symbols, you will not get an error message, but the stack trace
will not show the symbols.
Check the module before starting
.Xr gdb 1 :
.Bd -literal
$ file /boot/kernel/vinum.ko
/boot/kernel/vinum.ko: ELF 32-bit LSB shared object, Intel 80386,
version 1 (FreeBSD), not stripped
.Ed
.Pp
If the output shows that
.Pa /boot/kernel/vinum.ko
is stripped, you will have to find a version which is not.
Usually this will be
either in
.Pa /usr/obj/sys/modules/vinum/vinum.ko
(if you have built
.Nm
with a
.Dq Li "make world" )
or
.Pa /usr/src/sys/modules/vinum/vinum.ko
(if you have built
.Nm
in this directory).
Modify the file
.Pa .gdbinit.vinum.paths
accordingly.
.It
Either take a dump or use remote serial
.Xr gdb 1
to analyse the problem.
To analyse a dump, say
.Pa /var/crash/vmcore.5 ,
link
.Pa /var/crash/.gdbinit.crash
to
.Pa /var/crash/.gdbinit
and enter:
.Bd -literal -offset indent
cd /var/crash
gdb -k kernel.debug vmcore.5
.Ed
.Pp
This example assumes that you have installed the correct debug kernel at
.Pa /var/crash/kernel.debug .
If not, substitute the correct name of the debug kernel.
.Pp
To perform remote serial debugging,
link
.Pa /var/crash/.gdbinit.serial
to
.Pa /var/crash/.gdbinit
and enter
.Bd -literal -offset indent
cd /var/crash
gdb -k kernel.debug
.Ed
.Pp
In this case, the
.Pa .gdbinit
file performs the functions necessary to establish connection.
The remote
machine must already be in debug mode: enter the kernel debugger and select
.Ic gdb
(see
.Xr ddb 4
for more details).
The serial
.Pa .gdbinit
file expects the serial connection to run at 38400 bits per second; if you run
at a different speed, edit the file accordingly (look for the
.Va remotebaud
specification).
.Pp
The following example shows a remote debugging session using the
.Ic debug
command of
.Xr vinum 8 :
.Bd -literal
.if t .ps -3
.if t .vs -3
GDB 4.16 (i386-unknown-freebsd), Copyright 1996 Free Software Foundation, Inc.
Debugger (msg=0xf1093174 "vinum debug") at ../../i386/i386/db_interface.c:318
318 in_Debugger = 0;
#1 0xf108d9bc in vinumioctl (dev=0x40001900, cmd=0xc008464b, data=0xf6dedee0 "",
flag=0x3, p=0xf68b7940) at
/usr/src/sys/modules/Vinum/../../dev/Vinum/vinumioctl.c:102
102 Debugger ("vinum debug");
(kgdb) bt
#0 Debugger (msg=0xf0f661ac "vinum debug") at ../../i386/i386/db_interface.c:318
#1 0xf0f60a7c in vinumioctl (dev=0x40001900, cmd=0xc008464b, data=0xf6923ed0 "",
flag=0x3, p=0xf688e6c0) at
/usr/src/sys/modules/vinum/../../dev/vinum/vinumioctl.c:109
#2 0xf01833b7 in spec_ioctl (ap=0xf6923e0c) at ../../miscfs/specfs/spec_vnops.c:424
#3 0xf0182cc9 in spec_vnoperate (ap=0xf6923e0c) at ../../miscfs/specfs/spec_vnops.c:129
#4 0xf01eb3c1 in ufs_vnoperatespec (ap=0xf6923e0c) at ../../ufs/ufs/ufs_vnops.c:2312
#5 0xf017dbb1 in vn_ioctl (fp=0xf1007ec0, com=0xc008464b, data=0xf6923ed0 "",
p=0xf688e6c0) at vnode_if.h:395
#6 0xf015dce0 in ioctl (p=0xf688e6c0, uap=0xf6923f84) at ../../kern/sys_generic.c:473
#7 0xf0214c0b in syscall (frame={tf_es = 0x27, tf_ds = 0x27, tf_edi = 0xefbfcff8,
tf_esi = 0x1, tf_ebp = 0xefbfcf90, tf_isp = 0xf6923fd4, tf_ebx = 0x2,
tf_edx = 0x804b614, tf_ecx = 0x8085d10, tf_eax = 0x36, tf_trapno = 0x7,
tf_err = 0x2, tf_eip = 0x8060a34, tf_cs = 0x1f, tf_eflags = 0x286,
tf_esp = 0xefbfcf78, tf_ss = 0x27}) at ../../i386/i386/trap.c:1100
#8 0xf020a1fc in Xint0x80_syscall ()
#9 0x804832d in ?? ()
#10 0x80482ad in ?? ()
#11 0x80480e9 in ?? ()
.if t .vs
.if t .ps
.Ed
.Pp
When entering from the debugger, it is important that the source of frame 1
(listed by the
.Pa .gdbinit
file at the top of the example) contains the text
.Dq Li "Debugger (\*[q]vinum debug\*[q]);" .
.Pp
This is an indication that the address specifications are correct.
If you get
some other output, your symbols and the kernel module are out of sync, and the
trace will be meaningless.
.El
.Pp
For an initial investigation, the most important information is the output of
the
.Ic bt
(backtrace) command above.
.Ss Reporting Problems with Vinum
If you find any bugs in
.Nm ,
please report them to
.An Greg Lehey Aq grog@lemis.com .
Supply the following
information:
.Bl -bullet
.It
The output of the
.Nm vinum Cm list
command
(see
.Xr vinum 8 ) .
.It
Any messages printed in
.Pa /var/log/messages .
All such messages will be identified by the text
.Dq Li vinum
at the beginning.
.It
If you have a panic, a stack trace as described above.
.El
.Sh AUTHORS
.An Greg Lehey Aq grog@lemis.com .
.Sh HISTORY
.Nm
first appeared in
.Fx 3.0 .
The RAID-5 component of
.Nm
was developed by Cybernet Inc.\&
.Pq Pa http://www.cybernet.com/ ,
for its NetMAX product.
.Sh SEE ALSO
.Xr disklabel 5 ,
.Xr loader.conf 5 ,
.Xr disklabel 8 ,
.Xr loader 8 ,
.Xr newfs 8 ,
.Xr vinum 8
|