summaryrefslogtreecommitdiffstats
path: root/sys/geom/sched/gs_scheduler.h
blob: fa9cb14a9fdb1cbd674afdb2896eb76e54e8fa66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/*-
 * Copyright (c) 2009-2010 Fabio Checconi, Luigi Rizzo
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * $Id$
 * $FreeBSD$
 *
 * Prototypes for GEOM-based disk scheduling algorithms.
 * See g_sched.c for generic documentation.
 *
 * This file is used by the kernel modules implementing the various
 * scheduling algorithms. They should provide all the methods
 * defined in struct g_gsched, and also invoke the macro
 *	DECLARE_GSCHED_MODULE
 * which registers the scheduling algorithm with the geom_sched module.
 *
 * The various scheduling algorithms do not need to know anything
 * about geom, they only need to handle the 'bio' requests they
 * receive, pass them down when needed, and use the locking interface
 * defined below.
 */

#ifndef	_G_GSCHED_H_
#define	_G_GSCHED_H_

#ifdef _KERNEL
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/module.h>
#include <sys/queue.h>
#include <geom/geom.h>
#include "g_sched.h"

/*
 * This is the interface exported to scheduling modules.
 *
 * gs_init() is called when our scheduling algorithm
 *    starts being used by a geom 'sched'
 *
 * gs_fini() is called when the algorithm is released.
 *
 * gs_start() is called when a new request comes in. It should
 *    enqueue the request and return 0 if success, or return non-zero
 *    in case of failure (meaning the request is passed down).
 *    The scheduler can use bio->bio_caller1 to store a non-null
 *    pointer meaning the request is under its control.
 *
 * gs_next() is called in a loop by g_sched_dispatch(), right after
 *    gs_start(), or on timeouts or 'done' events. It should return
 *    immediately, either a pointer to the bio to be served or NULL
 *    if no bio should be served now.  If force is specified, a
 *    work-conserving behavior is expected.
 *
 * gs_done() is called when a request under service completes.
 *    In turn the scheduler may decide to call the dispatch loop
 *    to serve other pending requests (or make sure there is a pending
 *    timeout to avoid stalls).
 *
 * gs_init_class() is called when a new client (as determined by
 *    the classifier) starts being used.
 *
 * gs_hash_unref() is called right before the class hashtable is
 *    destroyed; after this call, the scheduler is supposed to hold no
 *    more references to the elements in the table.
 */

/* Forward declarations for prototypes. */
struct g_geom;
struct g_sched_class;

typedef void *gs_init_t (struct g_geom *geom);
typedef void gs_fini_t (void *data);
typedef int gs_start_t (void *data, struct bio *bio);
typedef void gs_done_t (void *data, struct bio *bio);
typedef struct bio *gs_next_t (void *data, int force);
typedef int gs_init_class_t (void *data, void *priv);
typedef void gs_fini_class_t (void *data, void *priv);
typedef void gs_hash_unref_t (void *data);

struct g_gsched {
	const char	*gs_name;
	int		gs_refs;
	int		gs_priv_size;

	gs_init_t	*gs_init;
	gs_fini_t	*gs_fini;
	gs_start_t	*gs_start;
	gs_done_t	*gs_done;
	gs_next_t	*gs_next;
	g_dumpconf_t	*gs_dumpconf;

	gs_init_class_t	*gs_init_class;
	gs_fini_class_t	*gs_fini_class;
	gs_hash_unref_t *gs_hash_unref;

	LIST_ENTRY(g_gsched) glist;
};

#define	KTR_GSCHED	KTR_SPARE4

MALLOC_DECLARE(M_GEOM_SCHED);

/*
 * Basic classification mechanism.  Each request is associated to
 * a g_sched_class, and each scheduler has the opportunity to set
 * its own private data for the given (class, geom) pair.  The
 * private data have a base type of g_sched_private, and are
 * extended at the end with the actual private fields of each
 * scheduler.
 */
struct g_sched_class {
	int	gsc_refs;
	int	gsc_expire;
	u_long	gsc_key;
	LIST_ENTRY(g_sched_class) gsc_clist;

	void	*gsc_priv[0];
};

/*
 * Manipulate the classifier's data.  g_sched_get_class() gets a reference
 * to the the class corresponding to bp in gp, allocating and initializing
 * it if necessary.  g_sched_put_class() releases the reference.
 * The returned value points to the private data for the class.
 */
void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
void g_sched_put_class(struct g_geom *gp, void *priv);

static inline struct g_sched_class *
g_sched_priv2class(void *priv)
{

	return ((struct g_sched_class *)((u_long)priv -
	    offsetof(struct g_sched_class, gsc_priv)));
}

static inline void
g_sched_priv_ref(void *priv)
{
	struct g_sched_class *gsc;

	gsc = g_sched_priv2class(priv);
	gsc->gsc_refs++;
}

/*
 * Locking interface.  When each operation registered with the
 * scheduler is invoked, a per-instance lock is taken to protect
 * the data associated with it.  If the scheduler needs something
 * else to access the same data (e.g., a callout) it must use
 * these functions.
 */
void g_sched_lock(struct g_geom *gp);
void g_sched_unlock(struct g_geom *gp);

/*
 * Restart request dispatching.  Must be called with the per-instance
 * mutex held.
 */
void g_sched_dispatch(struct g_geom *geom);

/*
 * Simple gathering of statistical data, used by schedulers to collect
 * info on process history.  Just keep an exponential average of the
 * samples, with some extra bits of precision.
 */
struct g_savg {
	uint64_t	gs_avg;
	unsigned int	gs_smpl;
};

static inline void
g_savg_add_sample(struct g_savg *ss, uint64_t sample)
{

	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
}

static inline int
g_savg_valid(struct g_savg *ss)
{

	/* We want at least 8 samples to deem an average as valid. */
	return (ss->gs_smpl > 7);
}

static inline uint64_t
g_savg_read(struct g_savg *ss)
{

	return (ss->gs_avg / ss->gs_smpl);
}

/*
 * Declaration of a scheduler module.
 */
int g_gsched_modevent(module_t mod, int cmd, void *arg);

#define	DECLARE_GSCHED_MODULE(name, gsched)			\
	static moduledata_t name##_mod = {			\
		#name,						\
		g_gsched_modevent,				\
		gsched,						\
	};							\
	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
	MODULE_DEPEND(name, geom_sched, 0, 0, 0);

#endif	/* _KERNEL */

#endif	/* _G_GSCHED_H_ */
OpenPOWER on IntegriCloud