summaryrefslogtreecommitdiffstats
path: root/sys/geom/raid3
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2006-03-13 01:03:18 +0000
committerpjd <pjd@FreeBSD.org>2006-03-13 01:03:18 +0000
commit349adc9b525fdf960528de1a0ae2e4150e9c529b (patch)
tree7aaf1fa2479fdff44934cd926946dc46c265bb56 /sys/geom/raid3
parentcc69a08fbfbd64dd13ebb80b18cc83824a3c8781 (diff)
downloadFreeBSD-src-349adc9b525fdf960528de1a0ae2e4150e9c529b.zip
FreeBSD-src-349adc9b525fdf960528de1a0ae2e4150e9c529b.tar.gz
- Reimplement I/O data allocation to prevent deadlocks.
Submitted by: green - Speed up synchronization process by using configurable number of I/O requests in parallel. + Add kern.geom.raid3.sync_requests tunable which defines how many parallel I/O requests should be used. + Retire kern.geom.raid3.reqs_per_sync and kern.geom.raid3.syncs_per_sec sysctls. - Fix race between regular and synchronization requests. - Reimplement raid3's data synchronization - do not use the topology lock for this purpose, as it may case deadlocks. - Stop synchronization from pre-sync hook. - Fix some other minor issues. Tested by: Mike Tancsa <mike@sentex.net> MFC after: 3 days
Diffstat (limited to 'sys/geom/raid3')
-rw-r--r--sys/geom/raid3/g_raid3.c812
-rw-r--r--sys/geom/raid3/g_raid3.h47
-rw-r--r--sys/geom/raid3/g_raid3_ctl.c165
3 files changed, 672 insertions, 352 deletions
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c
index bbfe165..edeb0f3 100644
--- a/sys/geom/raid3/g_raid3.c
+++ b/sys/geom/raid3/g_raid3.c
@@ -62,19 +62,15 @@ static u_int g_raid3_idletime = 5;
TUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime);
SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW,
&g_raid3_idletime, 0, "Mark components as clean when idling");
-static u_int g_raid3_reqs_per_sync = 5;
-SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
- &g_raid3_reqs_per_sync, 0,
- "Number of regular I/O requests per synchronization request");
-static u_int g_raid3_syncs_per_sec = 1000;
-SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
- &g_raid3_syncs_per_sec, 0,
- "Number of synchronizations requests per second");
static u_int g_raid3_disconnect_on_failure = 1;
TUNABLE_INT("kern.geom.raid3.disconnect_on_failure",
&g_raid3_disconnect_on_failure);
SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
&g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
+static u_int g_raid3_syncreqs = 4;
+TUNABLE_INT("kern.geom.raid3.sync_requests", &g_raid3_syncreqs);
+SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
+ &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests.");
static u_int g_raid3_n64k = 50;
TUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k);
@@ -94,24 +90,6 @@ SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0,
static u_int g_raid3_parity_mismatch = 0;
SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD,
&g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode");
-static u_int g_raid3_64k_requested = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_requested, CTLFLAG_RD,
- &g_raid3_64k_requested, 0, "Number of requested 64kB allocations");
-static u_int g_raid3_64k_failed = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_failed, CTLFLAG_RD,
- &g_raid3_64k_failed, 0, "Number of failed 64kB allocations");
-static u_int g_raid3_16k_requested = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_requested, CTLFLAG_RD,
- &g_raid3_16k_requested, 0, "Number of requested 16kB allocations");
-static u_int g_raid3_16k_failed = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 16k_failed, CTLFLAG_RD,
- &g_raid3_16k_failed, 0, "Number of failed 16kB allocations");
-static u_int g_raid3_4k_requested = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_requested, CTLFLAG_RD,
- &g_raid3_4k_requested, 0, "Number of requested 4kB allocations");
-static u_int g_raid3_4k_failed = 0;
-SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_failed, CTLFLAG_RD,
- &g_raid3_4k_failed, 0, "Number of failed 4kB allocations");
#define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \
G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \
@@ -119,7 +97,7 @@ SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 4k_failed, CTLFLAG_RD,
G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \
} while (0)
-static eventhandler_tag g_raid3_ehtag = NULL;
+static eventhandler_tag g_raid3_pre_sync = NULL, g_raid3_post_sync = NULL;
static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp,
struct g_geom *gp);
@@ -144,6 +122,8 @@ static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force);
static void g_raid3_dumpconf(struct sbuf *sb, const char *indent,
struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type);
+static int g_raid3_register_request(struct bio *pbp);
+static void g_raid3_sync_release(struct g_raid3_softc *sc);
static const char *
@@ -195,6 +175,25 @@ g_raid3_get_diskname(struct g_raid3_disk *disk)
return (disk->d_name);
}
+static int
+g_raid3_uma_ctor(void *mem, int size, void *arg, int flags)
+{
+ struct g_raid3_zone *sz = arg;
+
+ if (sz->sz_inuse == sz->sz_max)
+ return (ENOMEM);
+ sz->sz_inuse++;
+ return (0);
+}
+
+static void
+g_raid3_uma_dtor(void *mem, int size, void *arg)
+{
+ struct g_raid3_zone *sz = arg;
+
+ sz->sz_inuse--;
+}
+
#define g_raid3_xor(src1, src2, dst, size) \
_g_raid3_xor((uint64_t *)(src1), (uint64_t *)(src2), \
(uint64_t *)(dst), (size_t)size)
@@ -284,18 +283,17 @@ g_raid3_event_send(void *arg, int state, int flags)
mtx_unlock(&sc->sc_queue_mtx);
if ((flags & G_RAID3_EVENT_DONTWAIT) != 0)
return (0);
- g_topology_assert();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
- g_topology_unlock();
+ sx_xunlock(&sc->sc_lock);
while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) {
mtx_lock(&sc->sc_events_mtx);
MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event",
hz * 5);
}
- /* Don't even try to use 'sc' here, because it could be already dead. */
- g_topology_lock();
error = ep->e_error;
g_raid3_event_free(ep);
+ sx_xlock(&sc->sc_lock);
return (error);
}
@@ -325,9 +323,9 @@ g_raid3_event_cancel(struct g_raid3_disk *disk)
struct g_raid3_softc *sc;
struct g_raid3_event *ep, *tmpep;
- g_topology_assert();
-
sc = disk->d_softc;
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
mtx_lock(&sc->sc_events_mtx);
TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0)
@@ -355,6 +353,8 @@ g_raid3_ndisks(struct g_raid3_softc *sc, int state)
struct g_raid3_disk *disk;
u_int n, ndisks;
+ sx_assert(&sc->sc_lock, SX_LOCKED);
+
for (n = ndisks = 0; n < sc->sc_ndisks; n++) {
disk = &sc->sc_disks[n];
if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
@@ -404,6 +404,8 @@ g_raid3_destroy_consumer(void *arg, int flags __unused)
{
struct g_consumer *cp;
+ g_topology_assert();
+
cp = arg;
G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
g_detach(cp);
@@ -456,17 +458,20 @@ g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp)
struct g_consumer *cp;
int error;
- g_topology_assert();
+ g_topology_assert_not();
KASSERT(disk->d_consumer == NULL,
("Disk already connected (device %s).", disk->d_softc->sc_name));
+ g_topology_lock();
cp = g_new_consumer(disk->d_softc->sc_geom);
error = g_attach(cp, pp);
if (error != 0) {
g_destroy_consumer(cp);
+ g_topology_unlock();
return (error);
}
error = g_access(cp, 1, 1, 1);
+ g_topology_unlock();
if (error != 0) {
g_detach(cp);
g_destroy_consumer(cp);
@@ -520,7 +525,6 @@ g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp,
disk->d_sync.ds_consumer = NULL;
disk->d_sync.ds_offset = md->md_sync_offset;
disk->d_sync.ds_offset_done = md->md_sync_offset;
- disk->d_sync.ds_resync = -1;
disk->d_genid = md->md_genid;
disk->d_sync.ds_syncid = md->md_syncid;
if (errorp != NULL)
@@ -533,12 +537,13 @@ g_raid3_destroy_disk(struct g_raid3_disk *disk)
{
struct g_raid3_softc *sc;
- g_topology_assert();
+ g_topology_assert_not();
+ sc = disk->d_softc;
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
return;
g_raid3_event_cancel(disk);
- sc = disk->d_softc;
switch (disk->d_state) {
case G_RAID3_DISK_STATE_SYNCHRONIZING:
if (sc->sc_syncdisk != NULL)
@@ -547,7 +552,9 @@ g_raid3_destroy_disk(struct g_raid3_disk *disk)
case G_RAID3_DISK_STATE_NEW:
case G_RAID3_DISK_STATE_STALE:
case G_RAID3_DISK_STATE_ACTIVE:
+ g_topology_lock();
g_raid3_disconnect_consumer(sc, disk->d_consumer);
+ g_topology_unlock();
disk->d_consumer = NULL;
break;
default:
@@ -567,7 +574,8 @@ g_raid3_destroy_device(struct g_raid3_softc *sc)
struct g_consumer *cp;
u_int n;
- g_topology_assert();
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
gp = sc->sc_geom;
if (sc->sc_provider != NULL)
@@ -596,17 +604,21 @@ g_raid3_destroy_device(struct g_raid3_softc *sc)
callout_drain(&sc->sc_callout);
gp->softc = NULL;
cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer);
+ g_topology_lock();
if (cp != NULL)
g_raid3_disconnect_consumer(sc, cp);
sc->sc_sync.ds_geom->softc = NULL;
g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
- uma_zdestroy(sc->sc_zone_64k);
- uma_zdestroy(sc->sc_zone_16k);
- uma_zdestroy(sc->sc_zone_4k);
- mtx_destroy(&sc->sc_queue_mtx);
- mtx_destroy(&sc->sc_events_mtx);
G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name);
g_wither_geom(gp, ENXIO);
+ g_topology_unlock();
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
+ mtx_destroy(&sc->sc_queue_mtx);
+ mtx_destroy(&sc->sc_events_mtx);
+ sx_xunlock(&sc->sc_lock);
+ sx_destroy(&sc->sc_lock);
}
static void
@@ -633,13 +645,14 @@ g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
u_char *sector;
int error = 0;
- g_topology_assert();
-
+ g_topology_assert_not();
sc = disk->d_softc;
+ sx_assert(&sc->sc_lock, SX_LOCKED);
+
cp = disk->d_consumer;
KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
- KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
+ KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
cp->acw, cp->ace));
length = cp->provider->sectorsize;
@@ -647,9 +660,7 @@ g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO);
if (md != NULL)
raid3_metadata_encode(md, sector);
- g_topology_unlock();
error = g_write_data(cp, offset, sector, length);
- g_topology_lock();
free(sector, M_RAID3);
if (error != 0) {
if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) {
@@ -678,7 +689,9 @@ g_raid3_clear_metadata(struct g_raid3_disk *disk)
{
int error;
- g_topology_assert();
+ g_topology_assert_not();
+ sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
+
error = g_raid3_write_metadata(disk, NULL);
if (error == 0) {
G_RAID3_DEBUG(2, "Metadata on %s cleared.",
@@ -731,10 +744,14 @@ g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md)
void
g_raid3_update_metadata(struct g_raid3_disk *disk)
{
+ struct g_raid3_softc *sc;
struct g_raid3_metadata md;
int error;
- g_topology_assert();
+ g_topology_assert_not();
+ sc = disk->d_softc;
+ sx_assert(&sc->sc_lock, SX_LOCKED);
+
g_raid3_fill_metadata(disk, &md);
error = g_raid3_write_metadata(disk, &md);
if (error == 0) {
@@ -753,7 +770,8 @@ g_raid3_bump_syncid(struct g_raid3_softc *sc)
struct g_raid3_disk *disk;
u_int n;
- g_topology_assert();
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
("%s called with no active disks (device=%s).", __func__,
sc->sc_name));
@@ -777,7 +795,8 @@ g_raid3_bump_genid(struct g_raid3_softc *sc)
struct g_raid3_disk *disk;
u_int n;
- g_topology_assert();
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0,
("%s called with no active disks (device=%s).", __func__,
sc->sc_name));
@@ -796,26 +815,27 @@ g_raid3_bump_genid(struct g_raid3_softc *sc)
}
static int
-g_raid3_idle(struct g_raid3_softc *sc, int from_access)
+g_raid3_idle(struct g_raid3_softc *sc, int acw)
{
struct g_raid3_disk *disk;
u_int i;
int timeout;
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
if (sc->sc_provider == NULL)
return (0);
if (sc->sc_idle)
return (0);
if (sc->sc_writes > 0)
return (0);
- if (!from_access && sc->sc_provider->acw > 0) {
+ if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write);
if (timeout > 0)
return (timeout);
}
sc->sc_idle = 1;
- if (!from_access)
- g_topology_lock();
for (i = 0; i < sc->sc_ndisks; i++) {
disk = &sc->sc_disks[i];
if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
@@ -825,8 +845,6 @@ g_raid3_idle(struct g_raid3_softc *sc, int from_access)
disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
g_raid3_update_metadata(disk);
}
- if (!from_access)
- g_topology_unlock();
return (0);
}
@@ -836,9 +854,11 @@ g_raid3_unidle(struct g_raid3_softc *sc)
struct g_raid3_disk *disk;
u_int i;
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
sc->sc_idle = 0;
sc->sc_last_write = time_uptime;
- g_topology_lock();
for (i = 0; i < sc->sc_ndisks; i++) {
disk = &sc->sc_disks[i];
if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE)
@@ -848,7 +868,6 @@ g_raid3_unidle(struct g_raid3_softc *sc)
disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
g_raid3_update_metadata(disk);
}
- g_topology_unlock();
}
/*
@@ -925,12 +944,9 @@ g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp)
pbp->bio_children--;
KASSERT(cbp->bio_data != NULL, ("NULL bio_data"));
size = pbp->bio_length / (sc->sc_ndisks - 1);
- if (size > 16384)
- uma_zfree(sc->sc_zone_64k, cbp->bio_data);
- else if (size > 4096)
- uma_zfree(sc->sc_zone_16k, cbp->bio_data);
- else
- uma_zfree(sc->sc_zone_4k, cbp->bio_data);
+ uma_zfree_arg(sc->sc_zones[g_raid3_zone(size)].sz_zone,
+ cbp->bio_data,
+ &sc->sc_zones[g_raid3_zone(size)]);
if (G_RAID3_HEAD_BIO(pbp) == cbp) {
G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp);
G_RAID3_NEXT_BIO(cbp) = NULL;
@@ -955,28 +971,21 @@ g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp)
{
struct bio *bp, *cbp;
size_t size;
+ int memflag;
cbp = g_clone_bio(pbp);
if (cbp == NULL)
return (NULL);
size = pbp->bio_length / (sc->sc_ndisks - 1);
- if (size > 16384) {
- cbp->bio_data = uma_zalloc(sc->sc_zone_64k, M_NOWAIT);
- g_raid3_64k_requested++;
- } else if (size > 4096) {
- cbp->bio_data = uma_zalloc(sc->sc_zone_16k, M_NOWAIT);
- g_raid3_16k_requested++;
- } else {
- cbp->bio_data = uma_zalloc(sc->sc_zone_4k, M_NOWAIT);
- g_raid3_4k_requested++;
- }
+ if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
+ memflag = M_WAITOK;
+ else
+ memflag = M_NOWAIT;
+ cbp->bio_data = uma_zalloc_arg(sc->sc_zones[g_raid3_zone(size)].sz_zone,
+ &sc->sc_zones[g_raid3_zone(size)], memflag);
+ sc->sc_zones[g_raid3_zone(size)].sz_requested++;
if (cbp->bio_data == NULL) {
- if (size > 16384)
- g_raid3_64k_failed++;
- else if (size > 4096)
- g_raid3_16k_failed++;
- else
- g_raid3_4k_failed++;
+ sc->sc_zones[g_raid3_zone(size)].sz_failed++;
pbp->bio_children--;
g_destroy_bio(cbp);
return (NULL);
@@ -1051,7 +1060,7 @@ g_raid3_scatter(struct bio *pbp)
cp = disk->d_consumer;
cbp->bio_to = cp->provider;
G_RAID3_LOGREQ(3, cbp, "Sending request.");
- KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
+ KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
cp->acr, cp->acw, cp->ace));
cp->index++;
@@ -1157,7 +1166,7 @@ g_raid3_gather(struct bio *pbp)
fbp->bio_caller2 = disk;
fbp->bio_to = cp->provider;
G_RAID3_LOGREQ(3, fbp, "Sending request (recover).");
- KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
+ KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
cp->acr, cp->acw, cp->ace));
cp->index++;
@@ -1308,6 +1317,9 @@ g_raid3_regular_request(struct bio *cbp)
G_RAID3_LOGREQ(0, pbp, "Request failed.");
pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED;
pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY;
+ bioq_remove(&sc->sc_inflight, pbp);
+ /* Release delayed sync requests if possible. */
+ g_raid3_sync_release(sc);
g_io_deliver(pbp, pbp->bio_error);
break;
}
@@ -1363,45 +1375,137 @@ g_raid3_start(struct bio *bp)
}
/*
- * Send one synchronization request.
+ * Return TRUE if the given request is colliding with a in-progress
+ * synchronization request.
*/
-static void
-g_raid3_sync_one(struct g_raid3_softc *sc)
+static int
+g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp)
{
struct g_raid3_disk *disk;
- struct bio *bp;
+ struct bio *sbp;
+ off_t rstart, rend, sstart, send;
+ int i;
- KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
- ("Wrong device state (%s, %s).", sc->sc_name,
- g_raid3_device_state2str(sc->sc_state)));
disk = sc->sc_syncdisk;
- KASSERT(disk != NULL, ("No sync disk (%s).", sc->sc_name));
- KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING,
- ("Disk %s is not marked for synchronization.",
- g_raid3_get_diskname(disk)));
+ if (disk == NULL)
+ return (0);
+ rstart = bp->bio_offset;
+ rend = bp->bio_offset + bp->bio_length;
+ for (i = 0; i < g_raid3_syncreqs; i++) {
+ sbp = disk->d_sync.ds_bios[i];
+ if (sbp == NULL)
+ continue;
+ sstart = sbp->bio_offset;
+ send = sbp->bio_length;
+ if (sbp->bio_cmd == BIO_WRITE) {
+ sstart *= sc->sc_ndisks - 1;
+ send *= sc->sc_ndisks - 1;
+ }
+ send += sstart;
+ if (rend > sstart && rstart < send)
+ return (1);
+ }
+ return (0);
+}
- bp = g_new_bio();
- if (bp == NULL)
- return;
- bp->bio_parent = NULL;
- bp->bio_cmd = BIO_READ;
- bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
- bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
- bp->bio_cflags = 0;
- bp->bio_done = g_raid3_sync_done;
- bp->bio_data = disk->d_sync.ds_data;
- if (bp->bio_data == NULL) {
- g_destroy_bio(bp);
- return;
+/*
+ * Return TRUE if the given sync request is colliding with a in-progress regular
+ * request.
+ */
+static int
+g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp)
+{
+ off_t rstart, rend, sstart, send;
+ struct bio *bp;
+
+ if (sc->sc_syncdisk == NULL)
+ return (0);
+ sstart = sbp->bio_offset;
+ send = sstart + sbp->bio_length;
+ TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
+ rstart = bp->bio_offset;
+ rend = bp->bio_offset + bp->bio_length;
+ if (rend > sstart && rstart < send)
+ return (1);
}
- bp->bio_cflags = G_RAID3_BIO_CFLAG_REGSYNC;
- disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
- bp->bio_to = sc->sc_provider;
- G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
- disk->d_sync.ds_consumer->index++;
- g_io_request(bp, disk->d_sync.ds_consumer);
+ return (0);
+}
+
+/*
+ * Puts request onto delayed queue.
+ */
+static void
+g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp)
+{
+
+ G_RAID3_LOGREQ(2, bp, "Delaying request.");
+ bioq_insert_head(&sc->sc_regular_delayed, bp);
+}
+
+/*
+ * Puts synchronization request onto delayed queue.
+ */
+static void
+g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp)
+{
+
+ G_RAID3_LOGREQ(2, bp, "Delaying synchronization request.");
+ bioq_insert_tail(&sc->sc_sync_delayed, bp);
+}
+
+/*
+ * Releases delayed regular requests which don't collide anymore with sync
+ * requests.
+ */
+static void
+g_raid3_regular_release(struct g_raid3_softc *sc)
+{
+ struct bio *bp, *bp2;
+
+ TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
+ if (g_raid3_sync_collision(sc, bp))
+ continue;
+ bioq_remove(&sc->sc_regular_delayed, bp);
+ G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
+ mtx_lock(&sc->sc_queue_mtx);
+ bioq_insert_head(&sc->sc_queue, bp);
+#if 0
+ /*
+ * wakeup() is not needed, because this function is called from
+ * the worker thread.
+ */
+ wakeup(&sc->sc_queue);
+#endif
+ mtx_unlock(&sc->sc_queue_mtx);
+ }
+}
+
+/*
+ * Releases delayed sync requests which don't collide anymore with regular
+ * requests.
+ */
+static void
+g_raid3_sync_release(struct g_raid3_softc *sc)
+{
+ struct bio *bp, *bp2;
+
+ TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
+ if (g_raid3_regular_collision(sc, bp))
+ continue;
+ bioq_remove(&sc->sc_sync_delayed, bp);
+ G_RAID3_LOGREQ(2, bp,
+ "Releasing delayed synchronization request.");
+ g_io_request(bp, bp->bio_from);
+ }
}
+/*
+ * Handle synchronization requests.
+ * Every synchronization request is two-steps process: first, READ request is
+ * send to active provider and then WRITE request (with read data) to the provider
+ * beeing synchronized. When WRITE is finished, new synchronization request is
+ * send.
+ */
static void
g_raid3_sync_request(struct bio *bp)
{
@@ -1412,10 +1516,13 @@ g_raid3_sync_request(struct bio *bp)
sc = bp->bio_from->geom->softc;
disk = bp->bio_from->private;
if (disk == NULL) {
+ sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
g_topology_lock();
g_raid3_kill_consumer(sc, bp->bio_from);
g_topology_unlock();
+ free(bp->bio_data, M_RAID3);
g_destroy_bio(bp);
+ sx_xlock(&sc->sc_lock);
return;
}
@@ -1464,13 +1571,15 @@ g_raid3_sync_request(struct bio *bp)
dst += atom;
}
}
+ bp->bio_driver1 = bp->bio_driver2 = NULL;
+ bp->bio_pflags = 0;
bp->bio_offset /= sc->sc_ndisks - 1;
bp->bio_length /= sc->sc_ndisks - 1;
bp->bio_cmd = BIO_WRITE;
bp->bio_cflags = 0;
bp->bio_children = bp->bio_inbed = 0;
cp = disk->d_consumer;
- KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
+ KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
cp->acr, cp->acw, cp->ace));
cp->index++;
@@ -1480,6 +1589,9 @@ g_raid3_sync_request(struct bio *bp)
case BIO_WRITE:
{
struct g_raid3_disk_sync *sync;
+ off_t boffset, moffset;
+ void *data;
+ int i;
if (bp->bio_error != 0) {
G_RAID3_LOGREQ(0, bp,
@@ -1494,26 +1606,69 @@ g_raid3_sync_request(struct bio *bp)
}
G_RAID3_LOGREQ(3, bp, "Synchronization request finished.");
sync = &disk->d_sync;
- sync->ds_offset_done = bp->bio_offset + bp->bio_length;
- g_destroy_bio(bp);
- if (sync->ds_resync != -1)
- return;
- if (sync->ds_offset_done ==
- sc->sc_mediasize / (sc->sc_ndisks - 1)) {
+ if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) ||
+ sync->ds_consumer == NULL ||
+ (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
+ /* Don't send more synchronization requests. */
+ sync->ds_inflight--;
+ if (sync->ds_bios != NULL) {
+ i = (int)bp->bio_caller1;
+ sync->ds_bios[i] = NULL;
+ }
+ free(bp->bio_data, M_RAID3);
+ g_destroy_bio(bp);
+ if (sync->ds_inflight > 0)
+ return;
+ if (sync->ds_consumer == NULL ||
+ (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
+ return;
+ }
/*
* Disk up-to-date, activate it.
*/
g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE,
G_RAID3_EVENT_DONTWAIT);
return;
- } else if (sync->ds_offset_done % (MAXPHYS * 100) == 0) {
- /*
- * Update offset_done on every 100 blocks.
- * XXX: This should be configurable.
- */
- g_topology_lock();
+ }
+
+ /* Send next synchronization request. */
+ data = bp->bio_data;
+ bzero(bp, sizeof(*bp));
+ bp->bio_cmd = BIO_READ;
+ bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1);
+ bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
+ sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
+ bp->bio_done = g_raid3_sync_done;
+ bp->bio_data = data;
+ bp->bio_from = sync->ds_consumer;
+ bp->bio_to = sc->sc_provider;
+ G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
+ sync->ds_consumer->index++;
+ /*
+ * Delay the request if it is colliding with a regular request.
+ */
+ if (g_raid3_regular_collision(sc, bp))
+ g_raid3_sync_delay(sc, bp);
+ else
+ g_io_request(bp, sync->ds_consumer);
+
+ /* Release delayed requests if possible. */
+ g_raid3_regular_release(sc);
+
+ /* Find the smallest offset. */
+ moffset = sc->sc_mediasize;
+ for (i = 0; i < g_raid3_syncreqs; i++) {
+ bp = sync->ds_bios[i];
+ boffset = bp->bio_offset;
+ if (bp->bio_cmd == BIO_WRITE)
+ boffset *= sc->sc_ndisks - 1;
+ if (boffset < moffset)
+ moffset = boffset;
+ }
+ if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) {
+ /* Update offset_done on every 100 blocks. */
+ sync->ds_offset_done = moffset;
g_raid3_update_metadata(disk);
- g_topology_unlock();
}
return;
}
@@ -1569,8 +1724,14 @@ g_raid3_register_request(struct bio *pbp)
break;
case BIO_WRITE:
case BIO_DELETE:
- {
- struct g_raid3_disk_sync *sync;
+ /*
+ * Delay the request if it is colliding with a synchronization
+ * request.
+ */
+ if (g_raid3_sync_collision(sc, pbp)) {
+ g_raid3_regular_delay(sc, pbp);
+ return (0);
+ }
if (sc->sc_idle)
g_raid3_unidle(sc);
@@ -1578,19 +1739,7 @@ g_raid3_register_request(struct bio *pbp)
sc->sc_last_write = time_uptime;
ndisks = sc->sc_ndisks;
-
- if (sc->sc_syncdisk == NULL)
- break;
- sync = &sc->sc_syncdisk->d_sync;
- if (offset >= sync->ds_offset)
- break;
- if (offset + length <= sync->ds_offset_done)
- break;
- if (offset >= sync->ds_resync && sync->ds_resync != -1)
- break;
- sync->ds_resync = offset - (offset % MAXPHYS);
break;
- }
}
for (n = 0; n < ndisks; n++) {
disk = &sc->sc_disks[n];
@@ -1687,7 +1836,7 @@ g_raid3_register_request(struct bio *pbp)
cp = disk->d_consumer;
cbp->bio_to = cp->provider;
G_RAID3_LOGREQ(3, cbp, "Sending request.");
- KASSERT(cp->acr == 1 && cp->acw == 1 && cp->ace == 1,
+ KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
("Consumer %s not opened (r%dw%de%d).",
cp->provider->name, cp->acr, cp->acw, cp->ace));
cp->index++;
@@ -1697,6 +1846,12 @@ g_raid3_register_request(struct bio *pbp)
case BIO_WRITE:
case BIO_DELETE:
/*
+ * Put request onto inflight queue, so we can check if new
+ * synchronization requests don't collide with it.
+ */
+ bioq_insert_tail(&sc->sc_inflight, pbp);
+
+ /*
* Bump syncid on first write.
*/
if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) {
@@ -1737,6 +1892,9 @@ static int
g_raid3_try_destroy(struct g_raid3_softc *sc)
{
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
if (sc->sc_rootmount != NULL) {
G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
sc->sc_rootmount);
@@ -1753,11 +1911,13 @@ g_raid3_try_destroy(struct g_raid3_softc *sc)
g_topology_unlock();
G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__,
&sc->sc_worker);
+ /* Unlock sc_lock here, as it can be destroyed after wakeup. */
+ sx_xunlock(&sc->sc_lock);
wakeup(&sc->sc_worker);
sc->sc_worker = NULL;
} else {
- g_raid3_destroy_device(sc);
g_topology_unlock();
+ g_raid3_destroy_device(sc);
free(sc->sc_disks, M_RAID3);
free(sc, M_RAID3);
}
@@ -1771,11 +1931,8 @@ static void
g_raid3_worker(void *arg)
{
struct g_raid3_softc *sc;
- struct g_raid3_disk *disk;
- struct g_raid3_disk_sync *sync;
struct g_raid3_event *ep;
struct bio *bp;
- u_int nreqs;
int timeout;
sc = arg;
@@ -1783,7 +1940,7 @@ g_raid3_worker(void *arg)
sched_prio(curthread, PRIBIO);
mtx_unlock_spin(&sched_lock);
- nreqs = 0;
+ sx_xlock(&sc->sc_lock);
for (;;) {
G_RAID3_DEBUG(5, "%s: Let's see...", __func__);
/*
@@ -1791,7 +1948,7 @@ g_raid3_worker(void *arg)
* This is important to handle events before any I/O requests.
*/
ep = g_raid3_event_get(sc);
- if (ep != NULL && g_topology_try_lock()) {
+ if (ep != NULL) {
g_raid3_event_remove(sc, ep);
if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) {
/* Update only device status. */
@@ -1809,7 +1966,6 @@ g_raid3_worker(void *arg)
if (ep->e_error == 0)
g_raid3_update_device(sc, 0);
}
- g_topology_unlock();
if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) {
KASSERT(ep->e_error == 0,
("Error cannot be handled."));
@@ -1824,8 +1980,11 @@ g_raid3_worker(void *arg)
}
if ((sc->sc_flags &
G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
- if (g_raid3_try_destroy(sc))
+ if (g_raid3_try_destroy(sc)) {
+ curthread->td_pflags &= ~TDP_GEOM;
+ G_RAID3_DEBUG(1, "Thread exiting.");
kthread_exit(0);
+ }
}
G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__);
continue;
@@ -1834,7 +1993,7 @@ g_raid3_worker(void *arg)
* Check if we can mark array as CLEAN and if we can't take
* how much seconds should we wait.
*/
- timeout = g_raid3_idle(sc, 0);
+ timeout = g_raid3_idle(sc, -1);
/*
* Now I/O requests.
*/
@@ -1844,8 +2003,8 @@ g_raid3_worker(void *arg)
if (bp == NULL) {
if (ep != NULL) {
/*
- * No I/O requests and topology lock was
- * already held? Try again.
+ * We have a pending even, try to serve it
+ * again.
*/
mtx_unlock(&sc->sc_queue_mtx);
tsleep(ep, PRIBIO, "r3:top1", hz / 5);
@@ -1854,78 +2013,31 @@ g_raid3_worker(void *arg)
if ((sc->sc_flags &
G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
mtx_unlock(&sc->sc_queue_mtx);
- if (g_raid3_try_destroy(sc))
+ if (g_raid3_try_destroy(sc)) {
+ curthread->td_pflags &= ~TDP_GEOM;
+ G_RAID3_DEBUG(0, "Thread exiting.");
kthread_exit(0);
- mtx_lock(&sc->sc_queue_mtx);
- }
- }
- if (sc->sc_syncdisk != NULL &&
- (bp == NULL || nreqs > g_raid3_reqs_per_sync)) {
- mtx_unlock(&sc->sc_queue_mtx);
- /*
- * It is time for synchronization...
- */
- nreqs = 0;
- disk = sc->sc_syncdisk;
- sync = &disk->d_sync;
- if (sync->ds_offset <
- sc->sc_mediasize / (sc->sc_ndisks - 1) &&
- sync->ds_offset == sync->ds_offset_done) {
- if (sync->ds_resync != -1) {
- sync->ds_offset = sync->ds_resync;
- sync->ds_offset_done = sync->ds_resync;
- sync->ds_resync = -1;
}
- g_raid3_sync_one(sc);
+ mtx_lock(&sc->sc_queue_mtx);
}
- G_RAID3_DEBUG(5, "%s: I'm here 2.", __func__);
- goto sleep;
- }
- if (bp == NULL) {
+ sx_xunlock(&sc->sc_lock);
MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1",
timeout * hz);
+ sx_xlock(&sc->sc_lock);
G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__);
continue;
}
- nreqs++;
bioq_remove(&sc->sc_queue, bp);
mtx_unlock(&sc->sc_queue_mtx);
- if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) {
+ if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0)
g_raid3_regular_request(bp);
- } else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) {
- u_int timeout, sps;
-
+ else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0)
g_raid3_sync_request(bp);
-sleep:
- sps = atomic_load_acq_int(&g_raid3_syncs_per_sec);
- if (sps == 0) {
- G_RAID3_DEBUG(5, "%s: I'm here 6.", __func__);
- continue;
- }
- if (ep != NULL) {
- /*
- * We have some pending events, don't sleep now.
- */
- G_RAID3_DEBUG(5, "%s: I'm here 7.", __func__);
- tsleep(ep, PRIBIO, "r3:top2", hz / 5);
- continue;
- }
- mtx_lock(&sc->sc_queue_mtx);
- if (bioq_first(&sc->sc_queue) != NULL) {
- mtx_unlock(&sc->sc_queue_mtx);
- G_RAID3_DEBUG(5, "%s: I'm here 8.", __func__);
- continue;
- }
- timeout = hz / sps;
- if (timeout == 0)
- timeout = 1;
- MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w2",
- timeout);
- } else {
+ else {
if (g_raid3_register_request(bp) != 0) {
mtx_lock(&sc->sc_queue_mtx);
- bioq_insert_tail(&sc->sc_queue, bp);
+ bioq_insert_head(&sc->sc_queue, bp);
MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx,
PRIBIO | PDROP, "r3:lowmem", hz / 10);
}
@@ -1938,15 +2050,15 @@ static void
g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk)
{
- g_topology_assert();
+ sx_assert(&sc->sc_lock, SX_LOCKED);
if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) {
G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.",
- g_raid3_get_diskname(disk), disk->d_softc->sc_name);
+ g_raid3_get_diskname(disk), sc->sc_name);
disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
} else if (sc->sc_idle &&
(disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) {
G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.",
- g_raid3_get_diskname(disk), disk->d_softc->sc_name);
+ g_raid3_get_diskname(disk), sc->sc_name);
disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
}
}
@@ -1955,10 +2067,13 @@ static void
g_raid3_sync_start(struct g_raid3_softc *sc)
{
struct g_raid3_disk *disk;
+ struct g_consumer *cp;
+ struct bio *bp;
int error;
u_int n;
- g_topology_assert();
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
("Device not in DEGRADED state (%s, %u).", sc->sc_name,
@@ -1975,23 +2090,68 @@ g_raid3_sync_start(struct g_raid3_softc *sc)
if (disk == NULL)
return;
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
+ cp = g_new_consumer(sc->sc_sync.ds_geom);
+ error = g_attach(cp, sc->sc_provider);
+ KASSERT(error == 0,
+ ("Cannot attach to %s (error=%d).", sc->sc_name, error));
+ error = g_access(cp, 1, 0, 0);
+ KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
+
G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
g_raid3_get_diskname(disk));
disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY;
KASSERT(disk->d_sync.ds_consumer == NULL,
("Sync consumer already exists (device=%s, disk=%s).",
sc->sc_name, g_raid3_get_diskname(disk)));
- disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
+
+ disk->d_sync.ds_consumer = cp;
disk->d_sync.ds_consumer->private = disk;
disk->d_sync.ds_consumer->index = 0;
- error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
- KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
- disk->d_softc->sc_name, error));
- error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
- KASSERT(error == 0, ("Cannot open %s (error=%d).",
- disk->d_softc->sc_name, error));
- disk->d_sync.ds_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
sc->sc_syncdisk = disk;
+
+ /*
+ * Allocate memory for synchronization bios and initialize them.
+ */
+ disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs,
+ M_RAID3, M_WAITOK);
+ for (n = 0; n < g_raid3_syncreqs; n++) {
+ bp = g_alloc_bio();
+ disk->d_sync.ds_bios[n] = bp;
+ bp->bio_parent = NULL;
+ bp->bio_cmd = BIO_READ;
+ bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK);
+ bp->bio_cflags = 0;
+ bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1);
+ bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
+ disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1);
+ bp->bio_done = g_raid3_sync_done;
+ bp->bio_from = disk->d_sync.ds_consumer;
+ bp->bio_to = sc->sc_provider;
+ bp->bio_caller1 = (void *)n;
+ }
+
+ /* Set the number of in-flight synchronization requests. */
+ disk->d_sync.ds_inflight = g_raid3_syncreqs;
+
+ /*
+ * Fire off first synchronization requests.
+ */
+ for (n = 0; n < g_raid3_syncreqs; n++) {
+ bp = disk->d_sync.ds_bios[n];
+ G_RAID3_LOGREQ(3, bp, "Sending synchronization request.");
+ disk->d_sync.ds_consumer->index++;
+ /*
+ * Delay the request if it is colliding with a regular request.
+ */
+ if (g_raid3_regular_collision(sc, bp))
+ g_raid3_sync_delay(sc, bp);
+ else
+ g_io_request(bp, disk->d_sync.ds_consumer);
+ }
}
/*
@@ -2003,8 +2163,11 @@ static void
g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
{
struct g_raid3_disk *disk;
+ struct g_consumer *cp;
+
+ g_topology_assert_not();
+ sx_assert(&sc->sc_lock, SX_LOCKED);
- g_topology_assert();
KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED,
("Device not in DEGRADED state (%s, %u).", sc->sc_name,
sc->sc_state));
@@ -2019,15 +2182,21 @@ g_raid3_sync_stop(struct g_raid3_softc *sc, int type)
if (type == 0) {
G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.",
- disk->d_softc->sc_name, g_raid3_get_diskname(disk));
+ sc->sc_name, g_raid3_get_diskname(disk));
} else /* if (type == 1) */ {
G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
- disk->d_softc->sc_name, g_raid3_get_diskname(disk));
+ sc->sc_name, g_raid3_get_diskname(disk));
}
- g_raid3_kill_consumer(disk->d_softc, disk->d_sync.ds_consumer);
- free(disk->d_sync.ds_data, M_RAID3);
+ free(disk->d_sync.ds_bios, M_RAID3);
+ disk->d_sync.ds_bios = NULL;
+ cp = disk->d_sync.ds_consumer;
disk->d_sync.ds_consumer = NULL;
disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY;
+ sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
+ g_topology_lock();
+ g_raid3_kill_consumer(sc, cp);
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
}
static void
@@ -2035,13 +2204,15 @@ g_raid3_launch_provider(struct g_raid3_softc *sc)
{
struct g_provider *pp;
- g_topology_assert();
+ sx_assert(&sc->sc_lock, SX_LOCKED);
+ g_topology_lock();
pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name);
pp->mediasize = sc->sc_mediasize;
pp->sectorsize = sc->sc_sectorsize;
sc->sc_provider = pp;
g_error_provider(pp, 0);
+ g_topology_unlock();
G_RAID3_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
pp->name);
if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED)
@@ -2053,10 +2224,11 @@ g_raid3_destroy_provider(struct g_raid3_softc *sc)
{
struct bio *bp;
- g_topology_assert();
+ g_topology_assert_not();
KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
sc->sc_name));
+ g_topology_lock();
g_error_provider(sc->sc_provider, ENXIO);
mtx_lock(&sc->sc_queue_mtx);
while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
@@ -2068,6 +2240,7 @@ g_raid3_destroy_provider(struct g_raid3_softc *sc)
sc->sc_provider->name);
sc->sc_provider->flags |= G_PF_WITHER;
g_orphan_provider(sc->sc_provider, ENXIO);
+ g_topology_unlock();
sc->sc_provider = NULL;
if (sc->sc_syncdisk != NULL)
g_raid3_sync_stop(sc, 1);
@@ -2158,7 +2331,7 @@ g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force)
struct g_raid3_disk *disk;
u_int state;
- g_topology_assert();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
switch (sc->sc_state) {
case G_RAID3_DEVICE_STATE_STARTING:
@@ -2386,9 +2559,9 @@ g_raid3_update_disk(struct g_raid3_disk *disk, u_int state)
{
struct g_raid3_softc *sc;
- g_topology_assert();
-
sc = disk->d_softc;
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
again:
G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.",
g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state),
@@ -2696,7 +2869,7 @@ g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp,
struct g_raid3_disk *disk;
int error;
- g_topology_assert();
+ g_topology_assert_not();
G_RAID3_DEBUG(2, "Adding disk %s.", pp->name);
error = g_raid3_check_metadata(sc, pp, md);
@@ -2727,7 +2900,7 @@ static int
g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
{
struct g_raid3_softc *sc;
- int dcr, dcw, dce;
+ int dcr, dcw, dce, error;
g_topology_assert();
G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
@@ -2737,18 +2910,30 @@ g_raid3_access(struct g_provider *pp, int acr, int acw, int ace)
dcw = pp->acw + acw;
dce = pp->ace + ace;
+ error = 0;
sc = pp->geom->softc;
+ if (sc != NULL) {
+ if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0)
+ sc = NULL;
+ else {
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
+ }
+ }
if (sc == NULL ||
- g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1 ||
- (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) {
- if (acr <= 0 && acw <= 0 && ace <= 0)
- return (0);
- else
- return (ENXIO);
+ g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) {
+ if (acr > 0 || acw > 0 || ace > 0)
+ error = ENXIO;
+ goto end;
}
if (dcw == 0 && !sc->sc_idle)
- g_raid3_idle(sc, 1);
- return (0);
+ g_raid3_idle(sc, dcw);
+end:
+ if (sc != NULL) {
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
+ }
+ return (error);
}
static struct g_geom *
@@ -2792,8 +2977,12 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
sc->sc_disks[n].d_no = n;
sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK;
}
+ sx_init(&sc->sc_lock, "graid3:lock");
bioq_init(&sc->sc_queue);
mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF);
+ bioq_init(&sc->sc_regular_delayed);
+ bioq_init(&sc->sc_inflight);
+ bioq_init(&sc->sc_sync_delayed);
TAILQ_INIT(&sc->sc_events);
mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF);
callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
@@ -2808,26 +2997,38 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md)
gp->softc = sc;
gp->orphan = g_raid3_orphan;
sc->sc_sync.ds_geom = gp;
- sc->sc_zone_64k = uma_zcreate("gr3:64k", 65536, NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
- uma_zone_set_max(sc->sc_zone_64k, g_raid3_n64k);
- sc->sc_zone_16k = uma_zcreate("gr3:16k", 16384, NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
- uma_zone_set_max(sc->sc_zone_16k, g_raid3_n16k);
- sc->sc_zone_4k = uma_zcreate("gr3:4k", 4096, NULL, NULL, NULL, NULL,
- UMA_ALIGN_PTR, 0);
- uma_zone_set_max(sc->sc_zone_4k, g_raid3_n4k);
+
+ sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k", 65536,
+ g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
+ sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0;
+ sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k;
+ sc->sc_zones[G_RAID3_ZONE_64K].sz_requested =
+ sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0;
+ sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k", 16384,
+ g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
+ sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0;
+ sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k;
+ sc->sc_zones[G_RAID3_ZONE_16K].sz_requested =
+ sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0;
+ sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k", 4096,
+ g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
+ sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0;
+ sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k;
+ sc->sc_zones[G_RAID3_ZONE_4K].sz_requested =
+ sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0;
+
error = kthread_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0,
"g_raid3 %s", md->md_name);
if (error != 0) {
G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.",
sc->sc_name);
- uma_zdestroy(sc->sc_zone_64k);
- uma_zdestroy(sc->sc_zone_16k);
- uma_zdestroy(sc->sc_zone_4k);
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone);
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone);
+ uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone);
g_destroy_geom(sc->sc_sync.ds_geom);
mtx_destroy(&sc->sc_events_mtx);
mtx_destroy(&sc->sc_queue_mtx);
+ sx_destroy(&sc->sc_lock);
g_destroy_geom(sc->sc_geom);
free(sc->sc_disks, M_RAID3);
free(sc, M_RAID3);
@@ -2852,10 +3053,11 @@ g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force)
{
struct g_provider *pp;
- g_topology_assert();
-
+ g_topology_assert_not();
if (sc == NULL)
return (ENXIO);
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
+
pp = sc->sc_provider;
if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
if (force) {
@@ -2871,8 +3073,8 @@ g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force)
sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY;
sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT;
- g_topology_unlock();
G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc);
+ sx_xunlock(&sc->sc_lock);
mtx_lock(&sc->sc_queue_mtx);
wakeup(sc);
wakeup(&sc->sc_queue);
@@ -2881,7 +3083,7 @@ g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force)
while (sc->sc_worker != NULL)
tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5);
G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
- g_topology_lock();
+ sx_xlock(&sc->sc_lock);
g_raid3_destroy_device(sc);
free(sc->sc_disks, M_RAID3);
free(sc, M_RAID3);
@@ -2958,6 +3160,8 @@ g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
sc = gp->softc;
}
G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
error = g_raid3_add_disk(sc, pp, &md);
if (error != 0) {
G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
@@ -2965,9 +3169,13 @@ g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) ==
sc->sc_ndisks) {
g_raid3_destroy(sc, 1);
+ g_topology_lock();
+ return (NULL);
}
- return (NULL);
+ gp = NULL;
}
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
return (gp);
}
@@ -2975,8 +3183,17 @@ static int
g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused,
struct g_geom *gp)
{
+ struct g_raid3_softc *sc;
+ int error;
- return (g_raid3_destroy(gp->softc, 0));
+ g_topology_unlock();
+ sc = gp->softc;
+ sx_xlock(&sc->sc_lock);
+ error = g_raid3_destroy(gp->softc, 0);
+ if (error != 0)
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
+ return (error);
}
static void
@@ -3001,6 +3218,8 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
disk = cp->private;
if (disk == NULL)
return;
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
sbuf_printf(sb, "%s<Type>", indent);
if (disk->d_no == sc->sc_ndisks - 1)
sbuf_printf(sb, "PARITY");
@@ -3011,11 +3230,11 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
(u_int)disk->d_no);
if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) {
sbuf_printf(sb, "%s<Synchronized>", indent);
- if (disk->d_sync.ds_offset_done == 0)
+ if (disk->d_sync.ds_offset == 0)
sbuf_printf(sb, "0%%");
else {
sbuf_printf(sb, "%u%%",
- (u_int)((disk->d_sync.ds_offset_done * 100) /
+ (u_int)((disk->d_sync.ds_offset * 100) /
(sc->sc_mediasize / (sc->sc_ndisks - 1))));
}
sbuf_printf(sb, "</Synchronized>\n");
@@ -3049,7 +3268,23 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
sbuf_printf(sb, "</Flags>\n");
sbuf_printf(sb, "%s<State>%s</State>\n", indent,
g_raid3_disk_state2str(disk->d_state));
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
} else {
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
+ sbuf_printf(sb, "%s<Zone4kRequested>%u</Zone4kRequested>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_4K].sz_requested);
+ sbuf_printf(sb, "%s<Zone4kFailed>%u</Zone4kFailed>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_4K].sz_failed);
+ sbuf_printf(sb, "%s<Zone16kRequested>%u</Zone16kRequested>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_16K].sz_requested);
+ sbuf_printf(sb, "%s<Zone16kFailed>%u</Zone16kFailed>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_16K].sz_failed);
+ sbuf_printf(sb, "%s<Zone64kRequested>%u</Zone64kRequested>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_64K].sz_requested);
+ sbuf_printf(sb, "%s<Zone64kFailed>%u</Zone64kFailed>\n",
+ indent, sc->sc_zones[G_RAID3_ZONE_64K].sz_failed);
sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
@@ -3079,22 +3314,52 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
sc->sc_ndisks);
sbuf_printf(sb, "%s<State>%s</State>\n", indent,
g_raid3_device_state2str(sc->sc_state));
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
+ }
+}
+
+static void
+g_raid3_shutdown_pre_sync(void *arg, int howto)
+{
+ struct g_class *mp;
+ struct g_geom *gp, *gp2;
+ struct g_raid3_softc *sc;
+
+ mp = arg;
+ DROP_GIANT();
+ g_topology_lock();
+ LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
+ if ((sc = gp->softc) == NULL)
+ continue;
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
+ if (sc->sc_syncdisk != NULL)
+ g_raid3_sync_stop(sc, 1);
+ sx_xunlock(&sc->sc_lock);
+ g_topology_lock();
}
+ g_topology_unlock();
+ PICKUP_GIANT();
}
static void
-g_raid3_shutdown(void *arg, int howto)
+g_raid3_shutdown_post_sync(void *arg, int howto)
{
struct g_class *mp;
struct g_geom *gp, *gp2;
+ struct g_raid3_softc *sc;
mp = arg;
DROP_GIANT();
g_topology_lock();
LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
- if (gp->softc == NULL)
+ if ((sc = gp->softc) == NULL)
continue;
- g_raid3_destroy(gp->softc, 1);
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
+ g_raid3_destroy(sc, 1);
+ g_topology_lock();
}
g_topology_unlock();
PICKUP_GIANT();
@@ -3107,9 +3372,11 @@ static void
g_raid3_init(struct g_class *mp)
{
- g_raid3_ehtag = EVENTHANDLER_REGISTER(shutdown_post_sync,
- g_raid3_shutdown, mp, SHUTDOWN_PRI_FIRST);
- if (g_raid3_ehtag == NULL)
+ g_raid3_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+ g_raid3_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
+ g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
+ g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
+ if (g_raid3_pre_sync == NULL || g_raid3_post_sync == NULL)
G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event.");
}
@@ -3117,9 +3384,10 @@ static void
g_raid3_fini(struct g_class *mp)
{
- if (g_raid3_ehtag == NULL)
- return;
- EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_ehtag);
+ if (g_raid3_pre_sync != NULL)
+ EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid3_pre_sync);
+ if (g_raid3_post_sync != NULL)
+ EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync);
}
DECLARE_GEOM_CLASS(g_raid3_class, g_raid3);
diff --git a/sys/geom/raid3/g_raid3.h b/sys/geom/raid3/g_raid3.h
index ab97593..661e66b 100644
--- a/sys/geom/raid3/g_raid3.h
+++ b/sys/geom/raid3/g_raid3.h
@@ -110,12 +110,13 @@ extern u_int g_raid3_debug;
*/
struct g_raid3_disk_sync {
struct g_consumer *ds_consumer; /* Consumer connected to our device. */
- off_t ds_offset; /* Offset of next request to send. */
- off_t ds_offset_done; /* Offset of already synchronized
+ off_t ds_offset; /* Offset of next request to send. */
+ off_t ds_offset_done; /* Offset of already synchronized
region. */
- off_t ds_resync; /* Resynchronize from this offset. */
- u_int ds_syncid; /* Disk's synchronization ID. */
- u_char *ds_data;
+ off_t ds_resync; /* Resynchronize from this offset. */
+ u_int ds_syncid; /* Disk's synchronization ID. */
+ u_int ds_inflight; /* Number of in-flight sync requests. */
+ struct bio **ds_bios; /* BIOs for synchronization I/O. */
};
/*
@@ -169,6 +170,23 @@ struct g_raid3_event {
/* Bump genid immediately. */
#define G_RAID3_BUMP_GENID 0x2
+enum g_raid3_zones {
+ G_RAID3_ZONE_64K,
+ G_RAID3_ZONE_16K,
+ G_RAID3_ZONE_4K,
+ G_RAID3_NUM_ZONES
+};
+
+static __inline enum g_raid3_zones
+g_raid3_zone(size_t nbytes) {
+ if (nbytes > 16384)
+ return (G_RAID3_ZONE_64K);
+ else if (nbytes > 4096)
+ return (G_RAID3_ZONE_16K);
+ else
+ return (G_RAID3_ZONE_4K);
+};
+
struct g_raid3_softc {
u_int sc_state; /* Device state. */
uint64_t sc_mediasize; /* Device size. */
@@ -180,18 +198,31 @@ struct g_raid3_softc {
uint32_t sc_id; /* Device unique ID. */
+ struct sx sc_lock;
struct bio_queue_head sc_queue;
struct mtx sc_queue_mtx;
struct proc *sc_worker;
+ struct bio_queue_head sc_regular_delayed; /* Delayed I/O requests due
+ collision with sync
+ requests. */
+ struct bio_queue_head sc_inflight; /* In-flight regular write
+ requests. */
+ struct bio_queue_head sc_sync_delayed; /* Delayed sync requests due
+ collision with regular
+ requests. */
struct g_raid3_disk *sc_disks;
u_int sc_ndisks; /* Number of disks. */
u_int sc_round_robin;
struct g_raid3_disk *sc_syncdisk;
- uma_zone_t sc_zone_64k;
- uma_zone_t sc_zone_16k;
- uma_zone_t sc_zone_4k;
+ struct g_raid3_zone {
+ uma_zone_t sz_zone;
+ size_t sz_inuse;
+ size_t sz_max;
+ u_int sz_requested;
+ u_int sz_failed;
+ } sc_zones[G_RAID3_NUM_ZONES];
u_int sc_genid; /* Generation ID. */
u_int sc_syncid; /* Synchronization ID. */
diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c
index b83b933..eb138d7 100644
--- a/sys/geom/raid3/g_raid3_ctl.c
+++ b/sys/geom/raid3/g_raid3_ctl.c
@@ -51,7 +51,7 @@ g_raid3_find_device(struct g_class *mp, const char *name)
struct g_raid3_softc *sc;
struct g_geom *gp;
- g_topology_assert();
+ g_topology_lock();
LIST_FOREACH(gp, &mp->geom, geom) {
sc = gp->softc;
if (sc == NULL)
@@ -60,9 +60,12 @@ g_raid3_find_device(struct g_class *mp, const char *name)
continue;
if (strcmp(gp->name, name) == 0 ||
strcmp(sc->sc_name, name) == 0) {
+ g_topology_unlock();
+ sx_xlock(&sc->sc_lock);
return (sc);
}
}
+ g_topology_unlock();
return (NULL);
}
@@ -72,7 +75,7 @@ g_raid3_find_disk(struct g_raid3_softc *sc, const char *name)
struct g_raid3_disk *disk;
u_int n;
- g_topology_assert();
+ sx_assert(&sc->sc_lock, SX_XLOCKED);
for (n = 0; n < sc->sc_ndisks; n++) {
disk = &sc->sc_disks[n];
if (disk->d_state == G_RAID3_DISK_STATE_NODISK)
@@ -109,20 +112,6 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
- name = gctl_get_asciiparam(req, "arg0");
- if (name == NULL) {
- gctl_error(req, "No 'arg%u' argument.", 0);
- return;
- }
- sc = g_raid3_find_device(mp, name);
- if (sc == NULL) {
- gctl_error(req, "No such device: %s.", name);
- return;
- }
- if (g_raid3_ndisks(sc, -1) < sc->sc_ndisks) {
- gctl_error(req, "Not all disks connected.");
- return;
- }
autosync = gctl_get_paraml(req, "autosync", sizeof(*autosync));
if (autosync == NULL) {
gctl_error(req, "No '%s' argument.", "autosync");
@@ -174,6 +163,21 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Nothing has changed.");
return;
}
+ name = gctl_get_asciiparam(req, "arg0");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg%u' argument.", 0);
+ return;
+ }
+ sc = g_raid3_find_device(mp, name);
+ if (sc == NULL) {
+ gctl_error(req, "No such device: %s.", name);
+ return;
+ }
+ if (g_raid3_ndisks(sc, -1) < sc->sc_ndisks) {
+ gctl_error(req, "Not all disks connected.");
+ sx_xunlock(&sc->sc_lock);
+ return;
+ }
if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) {
if (*autosync) {
sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOAUTOSYNC;
@@ -223,6 +227,7 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp)
}
}
}
+ sx_xunlock(&sc->sc_lock);
}
static void
@@ -235,7 +240,6 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
const char *name;
int error, *nargs;
- g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@@ -258,16 +262,19 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
name = gctl_get_asciiparam(req, "arg1");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 1);
+ sx_xunlock(&sc->sc_lock);
return;
}
disk = g_raid3_find_disk(sc, name);
if (disk == NULL) {
gctl_error(req, "No such provider: %s.", name);
+ sx_xunlock(&sc->sc_lock);
return;
}
if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE &&
g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks) {
gctl_error(req, "There is one stale disk already.", name);
+ sx_xunlock(&sc->sc_lock);
return;
}
/*
@@ -279,18 +286,20 @@ g_raid3_ctl_rebuild(struct gctl_req *req, struct g_class *mp)
disk->d_flags |= G_RAID3_DISK_FLAG_FORCE_SYNC;
g_raid3_update_metadata(disk);
pp = disk->d_consumer->provider;
+ g_topology_lock();
error = g_raid3_read_metadata(disk->d_consumer, &md);
+ g_topology_unlock();
g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED,
G_RAID3_EVENT_WAIT);
if (error != 0) {
gctl_error(req, "Cannot read metadata from %s.", pp->name);
+ sx_xunlock(&sc->sc_lock);
return;
}
error = g_raid3_add_disk(sc, pp, &md);
- if (error != 0) {
+ if (error != 0)
gctl_error(req, "Cannot reconnect component %s.", pp->name);
- return;
- }
+ sx_xunlock(&sc->sc_lock);
}
static void
@@ -302,8 +311,6 @@ g_raid3_ctl_stop(struct gctl_req *req, struct g_class *mp)
char param[16];
u_int i;
- g_topology_assert();
-
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@@ -335,8 +342,10 @@ g_raid3_ctl_stop(struct gctl_req *req, struct g_class *mp)
if (error != 0) {
gctl_error(req, "Cannot destroy device %s (error=%d).",
sc->sc_geom->name, error);
+ sx_xunlock(&sc->sc_lock);
return;
}
+ /* No need to unlock, because lock is already dead. */
}
}
@@ -363,7 +372,6 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
intmax_t *no;
int *hardcode, *nargs, error;
- g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@@ -373,14 +381,14 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
- name = gctl_get_asciiparam(req, "arg0");
- if (name == NULL) {
- gctl_error(req, "No 'arg%u' argument.", 0);
+ hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
+ if (hardcode == NULL) {
+ gctl_error(req, "No '%s' argument.", "hardcode");
return;
}
- sc = g_raid3_find_device(mp, name);
- if (sc == NULL) {
- gctl_error(req, "No such device: %s.", name);
+ name = gctl_get_asciiparam(req, "arg1");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg%u' argument.", 1);
return;
}
no = gctl_get_paraml(req, "number", sizeof(*no));
@@ -388,40 +396,62 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "No '%s' argument.", "no");
return;
}
- if (*no >= sc->sc_ndisks) {
- gctl_error(req, "Invalid component number.");
+ g_topology_lock();
+ pp = g_provider_by_name(name);
+ if (pp == NULL) {
+ g_topology_unlock();
+ gctl_error(req, "Invalid provider.");
return;
}
- hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode));
- if (hardcode == NULL) {
- gctl_error(req, "No '%s' argument.", "hardcode");
- return;
+ gp = g_new_geomf(mp, "raid3:insert");
+ gp->orphan = g_raid3_ctl_insert_orphan;
+ cp = g_new_consumer(gp);
+ error = g_attach(cp, pp);
+ if (error != 0) {
+ g_topology_unlock();
+ gctl_error(req, "Cannot attach to %s.", pp->name);
+ goto end;
}
- disk = &sc->sc_disks[*no];
- if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
- gctl_error(req, "Component %u is already connected.", *no);
- return;
+ error = g_access(cp, 0, 1, 1);
+ if (error != 0) {
+ g_topology_unlock();
+ gctl_error(req, "Cannot access %s.", pp->name);
+ goto end;
}
- name = gctl_get_asciiparam(req, "arg1");
+ g_topology_unlock();
+ name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
- gctl_error(req, "No 'arg%u' argument.", 1);
- return;
+ gctl_error(req, "No 'arg%u' argument.", 0);
+ goto end;
}
- pp = g_provider_by_name(name);
- if (pp == NULL) {
- gctl_error(req, "Invalid provider.");
- return;
+ sc = g_raid3_find_device(mp, name);
+ if (sc == NULL) {
+ gctl_error(req, "No such device: %s.", name);
+ goto end;
+ }
+ if (*no >= sc->sc_ndisks) {
+ sx_xunlock(&sc->sc_lock);
+ gctl_error(req, "Invalid component number.");
+ goto end;
+ }
+ disk = &sc->sc_disks[*no];
+ if (disk->d_state != G_RAID3_DISK_STATE_NODISK) {
+ sx_xunlock(&sc->sc_lock);
+ gctl_error(req, "Component %u is already connected.", *no);
+ goto end;
}
if (((sc->sc_sectorsize / (sc->sc_ndisks - 1)) % pp->sectorsize) != 0) {
+ sx_xunlock(&sc->sc_lock);
gctl_error(req,
"Cannot insert provider %s, because of its sector size.",
pp->name);
- return;
+ goto end;
}
compsize = sc->sc_mediasize / (sc->sc_ndisks - 1);
if (compsize > pp->mediasize - pp->sectorsize) {
+ sx_xunlock(&sc->sc_lock);
gctl_error(req, "Provider %s too small.", pp->name);
- return;
+ goto end;
}
if (compsize < pp->mediasize - pp->sectorsize) {
gctl_error(req,
@@ -429,20 +459,8 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
pp->name, (intmax_t)compsize,
(intmax_t)(pp->mediasize - pp->sectorsize));
}
- gp = g_new_geomf(mp, "raid3:insert");
- gp->orphan = g_raid3_ctl_insert_orphan;
- cp = g_new_consumer(gp);
- error = g_attach(cp, pp);
- if (error != 0) {
- gctl_error(req, "Cannot attach to %s.", pp->name);
- goto end;
- }
- error = g_access(cp, 0, 1, 1);
- if (error != 0) {
- gctl_error(req, "Cannot access %s.", pp->name);
- goto end;
- }
g_raid3_fill_metadata(disk, &md);
+ sx_xunlock(&sc->sc_lock);
md.md_syncid = 0;
md.md_dflags = 0;
if (*hardcode)
@@ -452,20 +470,20 @@ g_raid3_ctl_insert(struct gctl_req *req, struct g_class *mp)
md.md_provsize = pp->mediasize;
sector = g_malloc(pp->sectorsize, M_WAITOK);
raid3_metadata_encode(&md, sector);
- g_topology_unlock();
error = g_write_data(cp, pp->mediasize - pp->sectorsize, sector,
pp->sectorsize);
- g_topology_lock();
g_free(sector);
if (error != 0)
gctl_error(req, "Cannot store metadata on %s.", pp->name);
end:
+ g_topology_lock();
if (cp->acw > 0)
g_access(cp, 0, -1, -1);
if (cp->provider != NULL)
g_detach(cp);
g_destroy_consumer(cp);
g_destroy_geom(gp);
+ g_topology_unlock();
}
static void
@@ -477,7 +495,6 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
intmax_t *no;
int *nargs;
- g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
@@ -487,6 +504,11 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "Invalid number of arguments.");
return;
}
+ no = gctl_get_paraml(req, "number", sizeof(*no));
+ if (no == NULL) {
+ gctl_error(req, "No '%s' argument.", "no");
+ return;
+ }
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
@@ -497,12 +519,8 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
gctl_error(req, "No such device: %s.", name);
return;
}
- no = gctl_get_paraml(req, "number", sizeof(*no));
- if (no == NULL) {
- gctl_error(req, "No '%s' argument.", "no");
- return;
- }
if (*no >= sc->sc_ndisks) {
+ sx_xunlock(&sc->sc_lock);
gctl_error(req, "Invalid component number.");
return;
}
@@ -517,7 +535,7 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
sc->sc_ndisks) {
gctl_error(req, "Cannot replace component number %u.",
*no);
- return;
+ break;
}
/* FALLTHROUGH */
case G_RAID3_DISK_STATE_STALE:
@@ -528,15 +546,16 @@ g_raid3_ctl_remove(struct gctl_req *req, struct g_class *mp)
} else {
g_raid3_event_send(disk,
G_RAID3_DISK_STATE_DISCONNECTED,
- G_RAID3_EVENT_WAIT);
+ G_RAID3_EVENT_DONTWAIT);
}
break;
case G_RAID3_DISK_STATE_NODISK:
break;
default:
gctl_error(req, "Cannot replace component number %u.", *no);
- return;
+ break;
}
+ sx_xunlock(&sc->sc_lock);
}
void
@@ -556,6 +575,7 @@ g_raid3_config(struct gctl_req *req, struct g_class *mp, const char *verb)
return;
}
+ g_topology_unlock();
if (strcmp(verb, "configure") == 0)
g_raid3_ctl_configure(req, mp);
else if (strcmp(verb, "insert") == 0)
@@ -568,4 +588,5 @@ g_raid3_config(struct gctl_req *req, struct g_class *mp, const char *verb)
g_raid3_ctl_stop(req, mp);
else
gctl_error(req, "Unknown verb.");
+ g_topology_lock();
}
OpenPOWER on IntegriCloud