summaryrefslogtreecommitdiffstats
path: root/sys/dev/vinum/request.h
blob: 4e10748f06b2634508893862f81fa17d9b29b888 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
/*-
 * Copyright (c) 1997, 1998
 *	Nan Yang Computer Services Limited.  All rights reserved.
 *
 *  This software is distributed under the so-called ``Berkeley
 *  License'':
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Nan Yang Computer
 *      Services Limited.
 * 4. Neither the name of the Company nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * This software is provided ``as is'', and any express or implied
 * warranties, including, but not limited to, the implied warranties of
 * merchantability and fitness for a particular purpose are disclaimed.
 * In no event shall the company or contributors be liable for any
 * direct, indirect, incidental, special, exemplary, or consequential
 * damages (including, but not limited to, procurement of substitute
 * goods or services; loss of use, data, or profits; or business
 * interruption) however caused and on any theory of liability, whether
 * in contract, strict liability, or tort (including negligence or
 * otherwise) arising in any way out of the use of this software, even if
 * advised of the possibility of such damage.
 *
 * $Id: request.h,v 1.21 2001/05/23 23:03:23 grog Exp grog $
 * $FreeBSD$
 */

/* Information needed to set up a transfer */

enum xferinfo {
    XFR_NORMAL_READ = 1,
    XFR_NORMAL_WRITE = 2,				    /* write request in normal mode */
    XFR_RECOVERY_READ = 4,
    XFR_DEGRADED_WRITE = 8,
    XFR_PARITYLESS_WRITE = 0x10,
    XFR_NO_PARITY_STRIPE = 0x20,			    /* parity stripe is not available */
    XFR_DATA_BLOCK = 0x40,				    /* data block in request */
    XFR_PARITY_BLOCK = 0x80,				    /* parity block in request */
    XFR_BAD_SUBDISK = 0x100,				    /* this subdisk is dead */
    XFR_MALLOCED = 0x200,				    /* this buffer is malloced */
#ifdef VINUMDEBUG
    XFR_PHASE2 = 0x800,					    /* documentation only: 2nd phase write */
#endif
    XFR_REVIVECONFLICT = 0x1000,			    /* possible conflict with a revive operation */
    XFR_BUFLOCKED = 0x2000,				    /* BUF_LOCK performed on this buffer */
    XFR_COPYBUF = 0x4000,				    /* data buffer was copied */
    /* operations that need a parity block */
    XFR_PARITYOP = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE),
    /* operations that use the group parameters */
    XFR_GROUPOP = (XFR_DEGRADED_WRITE | XFR_RECOVERY_READ),
    /* operations that that use the data parameters */
    XFR_DATAOP = (XFR_NORMAL_READ | XFR_NORMAL_WRITE | XFR_PARITYLESS_WRITE),
    /* operations requiring read before write */
    XFR_RBW = (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE),
    /* operations that need a malloced buffer */
    XFR_NEEDS_MALLOC = (XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)
};

/*
 * Describe one low-level request, part of a
 * high-level request.  This is an extended
 * struct buf buffer, and the first element
 * *must* be a struct buf.  We pass this
 * structure to the I/O routines instead of a
 * struct buf in order to be able to locate the
 * high-level request when it completes.
 *
 * All offsets and lengths are in sectors.
 */

struct rqelement {
    struct buf b;					    /* buf structure */
    struct rqgroup *rqg;				    /* pointer to our group */
    /* Information about the transfer */
    daddr_t sdoffset;					    /* offset in subdisk */
    int useroffset;					    /* offset in user buffer of normal data */
    /*
     * dataoffset and datalen refer to "individual" data
     * transfers which involve only this drive (normal read,
     * parityless write) and also degraded write.
     *
     * groupoffset and grouplen refer to the other "group"
     * operations (normal write, recovery read) which involve
     * more than one drive.  Both the offsets are relative to
     * the start of the local buffer.
     */
    int dataoffset;					    /* offset in buffer of the normal data */
    int groupoffset;					    /* offset in buffer of group data */
    short datalen;					    /* length of normal data (sectors) */
    short grouplen;					    /* length of group data (sectors) */
    short buflen;					    /* total buffer length to allocate */
    short flags;					    /* really enum xferinfo (see above) */
    /* Ways to find other components */
    short sdno;						    /* subdisk number */
    short driveno;					    /* drive number */
};

/*
 * A group of requests built to satisfy an I/O
 * transfer on a single plex.
 */
struct rqgroup {
    struct rqgroup *next;				    /* pointer to next group */
    struct request *rq;					    /* pointer to the request */
    short count;					    /* number of requests in this group */
    short active;					    /* and number active */
    short plexno;					    /* index of plex */
    int badsdno;					    /* index of bad subdisk or -1 */
    enum xferinfo flags;				    /* description of transfer */
    struct rangelock *lock;				    /* lock for this transfer */
    daddr_t lockbase;					    /* and lock address */
    struct rqelement rqe[0];				    /* and the elements of this request */
};

/*
 * Describe one high-level request and the
 * work we have to do to satisfy it.
 */
struct request {
    struct buf *bp;					    /* pointer to the high-level request */
    caddr_t save_data;					    /* for copied write buffers */
    enum xferinfo flags;
    union {
	int volno;					    /* volume index */
	int plexno;					    /* or plex index */
    } volplex;
    int error;						    /* current error indication */
    int sdno;						    /* reviving subdisk (XFR_REVIVECONFLICT) */
    short isplex;					    /* set if this is a plex request */
    short active;					    /* number of subrequests still active */
    struct rqgroup *rqg;				    /* pointer to the first group of requests */
    struct rqgroup *lrqg;				    /* and to the last group of requests */
    struct request *next;				    /* link of waiting requests */
};

/*
 * Extended buffer header for subdisk I/O.  Includes
 * a pointer to the user I/O request.
 */
struct sdbuf {
    struct buf b;					    /* our buffer */
    struct buf *bp;					    /* and pointer to parent */
    short driveno;					    /* drive index */
    short sdno;						    /* and subdisk index */
};

/*
 * Values returned by rqe and friends.  Be careful
 * with these: they are in order of increasing
 * seriousness.  Some routines check for
 * > REQUEST_RECOVERED to indicate a failed request. XXX
 */
enum requeststatus {
    REQUEST_OK,						    /* request built OK */
    REQUEST_RECOVERED,					    /* request OK, but involves RAID5 recovery */
    REQUEST_DEGRADED,					    /* parts of request failed */
    REQUEST_EOF,					    /* parts of request failed: outside plex */
    REQUEST_DOWN,					    /* all of request failed: subdisk(s) down */
    REQUEST_ENOMEM					    /* all of request failed: ran out of memory */
};

#ifdef VINUMDEBUG
/* Trace entry for request info (DEBUG_LASTREQS) */
enum rqinfo_type {
    loginfo_unused,					    /* never been used */
    loginfo_user_bp,					    /* this is the bp when strategy is called */
    loginfo_user_bpl,					    /* and this is the bp at launch time */
    loginfo_rqe,					    /* user RQE */
    loginfo_iodone,					    /* iodone */
    loginfo_raid5_data,					    /* write RAID-5 data block */
    loginfo_raid5_parity,				    /* write RAID-5 parity block */
    loginfo_sdio,					    /* subdisk I/O */
    loginfo_sdiol,					    /* subdisk I/O launch */
    loginfo_sdiodone,					    /* subdisk iodone */
    loginfo_lockwait,					    /* wait for range lock */
    loginfo_lock,					    /* lock range */
    loginfo_unlock,					    /* unlock range */
};

/*
 * This is the rangelock structure with an added
 * buffer pointer and plex number.  We don't need
 * the plex number for the locking protocol, but
 * it does help a lot when logging.
 */
struct rangelockinfo {
    daddr_t stripe;					    /* address + 1 of the range being locked  */
    struct buf *bp;					    /* user's buffer pointer */
    int plexno;
};

union rqinfou {						    /* info to pass to logrq */
    struct buf *bp;
    struct rqelement *rqe;				    /* address of request, for correlation */
    struct rangelockinfo *lockinfo;
};

struct rqinfo {
    enum rqinfo_type type;				    /* kind of event */
    struct timeval timestamp;				    /* time it happened */
    struct buf *bp;					    /* point to user buffer */
    int devmajor;					    /* major and minor device info */
    int devminor;
    union {
	struct buf b;					    /* yup, the *whole* buffer header */
	struct rqelement rqe;				    /* and the whole rqe */
	struct rangelock lockinfo;
    } info;
};

#define RQINFO_SIZE 128					    /* number of info slots in buffer */

void logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp);
#endif

/* Structures for the daemon */

/* types of request to the daemon */
enum daemonrq {
    daemonrq_none,					    /* dummy to catch bugs */
    daemonrq_ioerror,					    /* error occurred on I/O */
    daemonrq_saveconfig,				    /* save configuration */
    daemonrq_return,					    /* return to userland */
    daemonrq_ping,					    /* show sign of life */
    daemonrq_init,					    /* initialize a plex */
    daemonrq_revive,					    /* revive a subdisk */
    daemonrq_closedrive,				    /* close a drive */
};

/* info field for daemon requests */
union daemoninfo {					    /* and the request information */
    struct request *rq;					    /* for daemonrq_ioerror */
    struct sd *sd;					    /* for daemonrq_revive */
    struct plex *plex;					    /* for daemonrq_init */
    struct drive *drive;				    /* for daemonrq_closedrive */
    int nothing;					    /* for passing NULL */
};

struct daemonq {
    struct daemonq *next;				    /* pointer to next element in queue */
    enum daemonrq type;					    /* type of request */
    int privateinuse;					    /* private element, being used */
    union daemoninfo info;				    /* and the request information */
};

void queue_daemon_request(enum daemonrq type, union daemoninfo info);

extern int daemon_options;

enum daemon_option {
    daemon_verbose = 1,					    /* talk about what we're doing */
    daemon_stopped = 2,
    daemon_noupdate = 4,				    /* don't update the disk config, for recovery */
};

void freerq(struct request *rq);
void unlockrange(int plexno, struct rangelock *);
/* Local Variables: */
/* fill-column: 50 */
/* End: */
OpenPOWER on IntegriCloud