diff options
Diffstat (limited to 'sys/dev/vinum/vinumvar.h')
-rw-r--r-- | sys/dev/vinum/vinumvar.h | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/sys/dev/vinum/vinumvar.h b/sys/dev/vinum/vinumvar.h new file mode 100644 index 0000000..a7e7e2f --- /dev/null +++ b/sys/dev/vinum/vinumvar.h @@ -0,0 +1,516 @@ +/*- + * Copyright (c) 1997, 1998 + * Nan Yang Computer Services Limited. All rights reserved. + * + * This software is distributed under the so-called ``Berkeley + * License'': + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Nan Yang Computer + * Services Limited. + * 4. Neither the name of the Company nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * This software is provided ``as is'', and any express or implied + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose are disclaimed. + * In no event shall the company or contributors be liable for any + * direct, indirect, incidental, special, exemplary, or consequential + * damages (including, but not limited to, procurement of substitute + * goods or services; loss of use, data, or profits; or business + * interruption) however caused and on any theory of liability, whether + * in contract, strict liability, or tort (including negligence or + * otherwise) arising in any way out of the use of this software, even if + * advised of the possibility of such damage. + * + * $Id: vinumvar.h,v 1.3 1998/11/02 04:11:16 grog Exp $ + */ + +/* XXX gdb can't find our global pointers, so use this kludge to + * point to them locally. Remove after testing */ +#define BROKEN_GDB struct _vinum_conf *VC = &vinum_conf + +#include <sys/time.h> +#include "vinumstate.h" +/* Some configuration maxima. They're an enum because + * we can't define global constants. Sorry about that. + * + * These aren't as bad as they look: most of them are soft limits. + */ + +enum constants { + VINUM_HEADER = 512, /* size of header on disk */ + MAXCONFIGLINE = 1024, /* maximum size of a single config line */ + /* XXX Do we still need this? */ + MINVINUMSLICE = 1048576, /* minimum size of a slice */ + + CDEV_MAJOR = 91, /* major number for character device */ + BDEV_MAJOR = 25, /* and block device */ + + ROUND_ROBIN_READPOL = -1, /* round robin read policy */ + + /* type field in minor number */ + VINUM_VOLUME_TYPE = 0, + VINUM_PLEX_TYPE = 1, + VINUM_SD_TYPE = 2, + VINUM_DRIVE_TYPE = 3, + VINUM_SUPERDEV_TYPE = 4, /* super device. */ + + /* Shifts for the individual fields in the device */ + VINUM_TYPE_SHIFT = 28, + VINUM_VOL_SHIFT = 0, + VINUM_PLEX_SHIFT = 16, + VINUM_SD_SHIFT = 20, + VINUM_VOL_WIDTH = 8, + VINUM_PLEX_WIDTH = 3, + VINUM_SD_WIDTH = 8, + MAJORDEV_SHIFT = 8, + + MAXPLEX = 8, /* maximum number of plexes in a volume */ + MAXSD = 256, /* maximum number of subdisks in a plex */ + MAXDRIVENAME = 32, /* maximum length of a device name */ + MAXSDNAME = 64, /* maximum length of a subdisk name */ + MAXPLEXNAME = 64, /* maximum length of a plex name */ + MAXVOLNAME = 64, /* maximum length of a volume name */ + MAXNAME = 64, /* maximum length of any name */ + + +/* Create a block device number */ +#define VINUMBDEV(v,p,s,t) ((BDEV_MAJOR << MAJORDEV_SHIFT) \ + | (v << VINUM_VOL_SHIFT) \ + | (p << VINUM_PLEX_SHIFT) \ + | (s << VINUM_SD_SHIFT) \ + | (t << VINUM_TYPE_SHIFT) ) + +/* And a character device number */ +#define VINUMCDEV(v,p,s,t) ((CDEV_MAJOR << MAJORDEV_SHIFT) \ + | (v << VINUM_VOL_SHIFT) \ + | (p << VINUM_PLEX_SHIFT) \ + | (s << VINUM_SD_SHIFT) \ + | (t << VINUM_TYPE_SHIFT) ) + +/* extract device type */ +#define DEVTYPE(x) ((x >> VINUM_TYPE_SHIFT) & 7) + +/* extract volume number */ +#define VOLNO(x) (x & ((1 << VINUM_VOL_WIDTH) - 1)) + +/* extract plex number */ +#define PLEXNO(x) (VOL [VOLNO (x)].plex [(x >> VINUM_PLEX_SHIFT) & ((1 << VINUM_PLEX_WIDTH) - 1)]) + +/* extract subdisk number */ +#define SDNO(x) (PLEX [PLEXNO (x)].sdnos [(x >> VINUM_SD_SHIFT) & ((1 << VINUM_SD_WIDTH) - 1)]) + +/* extract drive number */ +#define DRIVENO(x) (SD [SDNO (x)].driveno) + + VINUM_SUPERDEV = VINUMBDEV(0, 0, 0, VINUM_SUPERDEV_TYPE), /* superdevice number */ + +/* the number of object entries to cater for initially, and also the + * value by which they are incremented. It doesn't take long + * to extend them, so theoretically we could start with 1 of each, but + * it's untidy to allocate such small areas. These values are + * probably too small. + */ + + INITIAL_DRIVES = 4, + INITIAL_VOLUMES = 4, + INITIAL_PLEXES = 8, + INITIAL_SUBDISKS = 16, + INITIAL_SUBDISKS_IN_PLEX = 4, /* number of subdisks to allocate to a plex */ + INITIAL_SUBDISKS_IN_DRIVE = 4, /* number of subdisks to allocate to a drive */ + INITIAL_DRIVE_FREELIST = 16, /* number of entries in drive freelist */ + PLEX_REGION_TABLE_SIZE = 8, /* number of entries in plex region tables */ + INITIAL_LOCKS = 8, /* number of locks to allocate to a volume */ + DEFAULT_REVIVE_BLOCKSIZE = 32768, /* size of block to transfer in one op */ + VINUMHOSTNAMELEN = 32, /* host name field in label */ +}; + +/* device numbers */ + +/* + * 31 30 28 27 20 19 18 16 15 8 7 0 + * |-----------------------------------------------------------------------------------------------| + * |X | Type | Subdisk number | X| Plex | Major number | volume number | + * |-----------------------------------------------------------------------------------------------| + * + * 0x2 03 1 19 06 + */ +struct devcode { +/* CARE. These fields assume a big-endian word. On a + * little-endian system, they're the wrong way around */ + unsigned volume:8; /* up to 256 volumes */ + unsigned major:8; /* this is where the major number fits */ + unsigned plex:3; /* up to 8 plexes per volume */ + unsigned unused:1; /* up for grabs */ + unsigned sd:8; /* up to 256 subdisks per plex */ + unsigned type:3; /* type of object */ + /* type field + VINUM_VOLUME = 0, + VINUM_PLEX = 1, + VINUM_SUBDISK = 2, + VINUM_DRIVE = 3, + VINUM_SUPERDEV = 4, */ + unsigned signbit:1; /* to make 32 bits */ +}; + +#define VINUM_DIR "/dev/vinum" +#define VINUM_RDIR "/dev/rvinum" +#define VINUM_SUPERDEV_NAME VINUM_DIR"/control" + +/* Flags for all objects. Most of them only apply to + * specific objects, but we have space for all in any + * 32 bit flags word. */ +enum objflags { + VF_LOCKED = 1, /* somebody has locked access to this object */ + VF_LOCKING = 2, /* we want access to this object */ + VF_WRITETHROUGH = 8, /* volume: write through */ + VF_INITED = 0x10, /* unit has been initialized */ + VF_WLABEL = 0x20, /* label area is writable */ + VF_LABELLING = 0x40, /* unit is currently being labelled */ + VF_WANTED = 0x80, /* someone is waiting to obtain a lock */ + VF_RAW = 0x100, /* raw volume (no file system) */ + VF_LOADED = 0x200, /* module is loaded */ + VF_CONFIGURING = 0x400, /* somebody is changing the config */ + VF_WILL_CONFIGURE = 0x800, /* somebody wants to change the config */ + VF_CONFIG_INCOMPLETE = 0x1000, /* haven't finished changing the config */ + VF_CONFIG_SETUPSTATE = 0x2000, /* set a volume up if all plexes are empty */ + VF_READING_CONFIG = 0x4000, /* we're reading config database from disk */ + VF_KERNELOP = 0x8000, /* we're performing ops from kernel space */ + VF_DIRTYCONFIG = 0x10000, /* config needs updating */ +}; + +/* Global configuration information for the vinum subsystem */ +struct _vinum_conf { + /* Pointers to vinum structures */ + struct drive *drive; + struct sd *sd; + struct plex *plex; + struct volume *volume; + + /* the number allocated */ + int drives_allocated; + int subdisks_allocated; + int plexes_allocated; + int volumes_allocated; + + /* and the number currently in use */ + int drives_used; + int subdisks_used; + int plexes_used; + int volumes_used; + + int flags; + int opencount; /* number of times we've been opened */ +#if DEBUG + int lastrq; + struct buf *lastbuf; + struct rqinfo **rqipp; + struct rqinfo *rqinfop; +#endif +}; + +/* Use these defines to simplify code */ +#define DRIVE vinum_conf.drive +#define SD vinum_conf.sd +#define PLEX vinum_conf.plex +#define VOL vinum_conf.volume +#define VFLAGS vinum_conf.flags + +/* Slice header + + * Vinum drives start with this structure: + * + *\ Sector + * |--------------------------------------| + * | PDP-11 memorial boot block | 0 + * |--------------------------------------| + * | Disk label, maybe | 1 + * |--------------------------------------| + * | Slice definition (vinum_hdr) | 2 + * |--------------------------------------| + * | | + * | Configuration info, first copy | 3 + * | | + * |--------------------------------------| + * | | + * | Configuration info, second copy | 3 + size of config + * | | + * |--------------------------------------| + */ + +/* Sizes and offsets of our information */ +enum { + VINUM_LABEL_OFFSET = 4096, /* offset of vinum label */ + VINUMHEADERLEN = 512, /* size of vinum label */ + VINUM_CONFIG_OFFSET = 4608, /* offset of first config copy */ + MAXCONFIG = 65536, /* and size of config copy */ + DATASTART = (MAXCONFIG * 2 + VINUM_CONFIG_OFFSET) / DEV_BSIZE /* this is where the data starts */ +}; + +/* hostname is 256 bytes long, but we don't need to shlep + * multiple copies in vinum. We use the host name just + * to identify this system, and 32 bytes should be ample + * for that purpose */ + +struct vinum_label { + char sysname[VINUMHOSTNAMELEN]; /* system name at time of creation */ + char name[MAXDRIVENAME]; /* our name of the drive */ + struct timeval date_of_birth; /* the time it was created */ + struct timeval last_update; /* and the time of last update */ + off_t drive_size; /* total size in bytes of the drive. + * This value includes the headers */ +}; + +struct vinum_hdr { + long long magic; /* we're long on magic numbers */ + /* XXX Get these right for big-endian */ +#define VINUM_MAGIC 22322600044678729LL /* should be this */ +#define VINUM_NOMAGIC 22322600044678990LL /* becomes this after obliteration */ + int config_length; /* size in bytes of each copy of the + * configuration info. + * This must be a multiple of the sector size. */ + + struct vinum_label label; /* unique label */ +}; + +/* Information returned from read_drive_label */ +enum drive_label_info { + DL_CANT_OPEN, /* invalid partition */ + DL_NOT_OURS, /* valid partition, but no vinum label */ + DL_DELETED_LABEL, /* valid partition, deleted label found */ + DL_WRONG_DRIVE, /* drive name doesn't match */ + DL_OURS /* valid partition and label found */ +}; + +/*** Drive definitions ***/ +/* A drive corresponds to a disk slice. We use a different term to show + * the difference in usage: it doesn't have to be a slice, and could + * theroretically be a complete, unpartitioned disk */ + +struct drive { + enum drivestate state; /* current state */ + int subdisks_allocated; /* number of entries in sd */ + int subdisks_used; /* and the number used */ + int blocksize; /* size of fs blocks */ + u_int64_t sectors_available; /* number of sectors still available */ + int secsperblock; + int lasterror; /* last error on drive */ + int driveno; /* index of drive in vinum_conf */ + int opencount; /* number of up subdisks */ + u_int64_t reads; /* number of reads on this drive */ + u_int64_t writes; /* number of writes on this drive */ + u_int64_t bytes_read; /* number of bytes read */ + u_int64_t bytes_written; /* number of bytes written */ + dev_t dev; /* and device number */ + char devicename[MAXDRIVENAME]; /* name of the slice it's on */ + struct vnode *vp; /* vnode pointer */ + struct proc *p; + struct vinum_label label; /* and the label information */ + struct partinfo partinfo; /* partition information */ + int freelist_size; /* number of entries alloced in free list */ + int freelist_entries; /* number of entries used in free list */ + struct drive_freelist { /* sorted list of free space on drive */ + u_int64_t offset; + long sectors; + } *freelist; +}; + +/*** Subdisk definitions ***/ + +struct sd { + enum sdstate state; /* state */ + /* offsets in blocks */ + int64_t driveoffset; /* offset on drive */ + int64_t plexoffset; /* offset in plex */ + u_int64_t sectors; /* and length in sectors */ + int plexno; /* index of plex, if it belongs */ + int driveno; /* index of the drive on which it is located */ + int sdno; /* our index in vinum_conf */ + int pid; /* pid of process which opened us */ + u_int64_t reads; /* number of reads on this subdisk */ + u_int64_t writes; /* number of writes on this subdisk */ + u_int64_t bytes_read; /* number of bytes read */ + u_int64_t bytes_written; /* number of bytes written */ + char name[MAXSDNAME]; /* name of subdisk */ +}; + +/*** Plex definitions ***/ + +/* kinds of plex organization */ +enum plexorg { + plex_disorg, /* disorganized */ + plex_concat, /* concatenated plex */ + plex_striped, /* striped plex */ + plex_raid5 /* RAID5 plex */ +}; + +/* Region in plex (either defective or unmapped) */ +struct plexregion { + u_int64_t offset; /* start of region */ + u_int64_t length; /* length */ +}; + +struct plex { + enum plexorg organization; /* Plex organization */ + enum plexstate state; /* and current state */ + u_int64_t length; /* total length of plex (max offset) */ + int flags; + int stripesize; /* size of stripe or raid band, in sectors */ + int subdisks; /* number of associated subdisks */ + int subdisks_allocated; /* number of subdisks allocated space for */ + int *sdnos; /* list of component subdisks */ + int plexno; /* index of plex in vinum_conf */ + int volno; /* index of volume */ + int volplexno; /* number of plex in volume */ + int pid; /* pid of process which opened us */ + /* Lock information */ + int locks; /* number of locks used */ + int alloclocks; /* number of locks allocated */ + struct rangelock *lock; /* ranges of locked addresses */ + /* Statistics */ + u_int64_t reads; /* number of reads on this plex */ + u_int64_t writes; /* number of writes on this plex */ + u_int64_t bytes_read; /* number of bytes read */ + u_int64_t bytes_written; /* number of bytes written */ + u_int64_t multiblock; /* requests that needed more than one block */ + u_int64_t multistripe; /* requests that needed more than one stripe */ + /* revive parameters */ + u_int64_t revived; /* block number of current revive request */ + int revive_blocksize; /* revive block size (bytes) */ + int revive_interval; /* and time to wait between transfers */ + struct request *waitlist; /* list of requests waiting on revive op */ + /* geometry control */ + int defective_regions; /* number of regions which are defective */ + int defective_region_count; /* number of entries in defective_region */ + struct plexregion *defective_region; /* list of offset/length pairs: defective sds */ + int unmapped_regions; /* number of regions which are missing */ + int unmapped_region_count; /* number of entries in unmapped_region */ + struct plexregion *unmapped_region; /* list of offset/length pairs: missing sds */ + char name[MAXPLEXNAME]; /* name of plex */ +}; + +/*** Volume definitions ***/ + + +struct volume { + enum volumestate state; /* current state */ + int plexes; /* number of plexes */ + int preferred_plex; /* plex to read from, -1 for round-robin */ + int last_plex_read; /* index of plex used for last read, + * for round-robin */ + dev_t devno; /* device number */ + int flags; /* status and configuration flags */ + int opencount; /* number of opens (all the same process) */ + int openflags; /* flags supplied to last open(2) */ + u_int64_t size; /* size of volume */ + int disk; /* disk index */ + int blocksize; /* logical block size */ + int active; /* number of outstanding requests active */ + int subops; /* and the number of suboperations */ + pid_t pid; /* pid of locker */ + /* Statistics */ + u_int64_t bytes_read; /* number of bytes read */ + u_int64_t bytes_written; /* number of bytes written */ + u_int64_t reads; /* number of reads on this volume */ + u_int64_t writes; /* number of writes on this volume */ + u_int64_t recovered_reads; /* reads recovered from another plex */ + /* Unlike subdisks in the plex, space for the plex pointers is static */ + int plex[MAXPLEX]; /* index of plexes */ + char name[MAXVOLNAME]; /* name of volume */ + struct disklabel label; /* for DIOCGPART */ +}; + +/* Table expansion. Expand table, which contains oldcount + * entries of type element, by increment entries, and change + * oldcount accordingly */ +#define EXPAND(table, element, oldcount, increment) \ +{ \ + expand_table ((void **) &table, \ + oldcount * sizeof (element), \ + (oldcount + increment) * sizeof (element) ); \ + oldcount += increment; \ + } + +/* Information on vinum's memory usage */ +struct meminfo { + int mallocs; /* number of malloced blocks */ + int total_malloced; /* total amount malloced */ + int highwater; /* maximum number of mallocs */ + struct mc *malloced; /* pointer to kernel table */ +}; + +struct mc { + int seq; + int size; + short line; + short flags; +#define ALLOC_KVA 1 /* allocated via kva calls */ + int *databuf; /* really vm_object_t */ + caddr_t address; + char file[16]; +}; + +/* These enums are used by the state transition + * routines. They're in bit map format: + * + * Bit 0: Other plexes in the volume are down + * Bit 1: Other plexes in the volume are up + * Bit 2: The current plex is up + * Maybe they should be local to + * state.c */ +enum volplexstate { + volplex_onlyusdown = 0, /* we're the only plex, and we're down */ + volplex_alldown, /* 1: another plex is down, and so are we */ + volplex_otherup, /* 2: another plex is up */ + volplex_otherupdown, /* other plexes are up and down */ + volplex_onlyus, /* 4: we're up and alone */ + volplex_onlyusup, /* only we are up, others are down */ + volplex_allup, /* all plexes are up */ + volplex_someup /* some plexes are up, including us */ +}; + +/* state map for plex */ +enum sdstates { + sd_emptystate = 1, + sd_downstate = 2, /* found an SD which is down */ + sd_crashedstate = 4, /* found an SD which is crashed */ + sd_obsoletestate = 8, /* found an SD which is obsolete */ + sd_stalestate = 16, /* found an SD which is stale */ + sd_rebornstate = 32, /* found an SD which is reborn */ + sd_upstate = 64, /* found an SD which is up */ + sd_initstate = 128, /* found an SD which is init */ + sd_otherstate = 256 /* found an SD in some other state */ +}; + +/* This is really just a parameter to pass to + * set_<foo>_state, but since it needs to be known + * in the external definitions, we need to define + * it here */ +enum setstateflags { + setstate_none = 0, /* no flags */ + setstate_force = 1, /* force the state change */ + setstate_configuring = 2, /* we're currently configuring, don't save */ + setstate_recursing = 4, /* we're called from another setstate function */ + setstate_norecurse = 8, /* don't call other setstate functions */ + setstate_noupdate = 16 /* don't update config */ +}; + +#ifdef DEBUG +/* Debugging stuff */ +#define DEBUG_ADDRESSES 1 +#define DEBUG_NUMOUTPUT 2 +#define DEBUG_RESID 4 /* go into debugger in complete_rqe */ +#define DEBUG_LASTREQS 8 /* keep a circular buffer of last requests */ +#define DEBUG_REMOTEGDB 256 /* go into remote gdb */ +#endif |