summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/md.c73
-rw-r--r--drivers/md/raid1.c4
-rw-r--r--drivers/md/raid10.c5
-rw-r--r--drivers/md/raid5.c6
-rw-r--r--drivers/md/raid6main.c6
-rw-r--r--include/linux/raid/md.h2
-rw-r--r--include/linux/raid/md_k.h7
7 files changed, 82 insertions, 21 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c842e34..177d2a7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -218,6 +218,8 @@ static mddev_t * mddev_find(dev_t unit)
INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
+ bio_list_init(&new->write_list);
+ spin_lock_init(&new->write_lock);
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
@@ -1251,9 +1253,11 @@ static void md_update_sb(mddev_t * mddev)
int err, count = 100;
struct list_head *tmp;
mdk_rdev_t *rdev;
+ int sync_req;
- mddev->sb_dirty = 0;
repeat:
+ spin_lock(&mddev->write_lock);
+ sync_req = mddev->in_sync;
mddev->utime = get_seconds();
mddev->events ++;
@@ -1272,8 +1276,12 @@ repeat:
* do not write anything to disk if using
* nonpersistent superblocks
*/
- if (!mddev->persistent)
+ if (!mddev->persistent) {
+ mddev->sb_dirty = 0;
+ spin_unlock(&mddev->write_lock);
return;
+ }
+ spin_unlock(&mddev->write_lock);
dprintk(KERN_INFO
"md: updating %s RAID superblock on device (in sync %d)\n",
@@ -1304,6 +1312,15 @@ repeat:
printk(KERN_ERR \
"md: excessive errors occurred during superblock update, exiting\n");
}
+ spin_lock(&mddev->write_lock);
+ if (mddev->in_sync != sync_req) {
+ /* have to write it out again */
+ spin_unlock(&mddev->write_lock);
+ goto repeat;
+ }
+ mddev->sb_dirty = 0;
+ spin_unlock(&mddev->write_lock);
+
}
/*
@@ -3178,19 +3195,31 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
}
-void md_write_start(mddev_t *mddev)
+/* md_write_start(mddev, bi)
+ * If we need to update some array metadata (e.g. 'active' flag
+ * in superblock) before writing, queue bi for later writing
+ * and return 0, else return 1 and it will be written now
+ */
+int md_write_start(mddev_t *mddev, struct bio *bi)
{
- if (!atomic_read(&mddev->writes_pending)) {
- mddev_lock_uninterruptible(mddev);
- if (mddev->in_sync) {
- mddev->in_sync = 0;
- del_timer(&mddev->safemode_timer);
- md_update_sb(mddev);
- }
- atomic_inc(&mddev->writes_pending);
- mddev_unlock(mddev);
- } else
- atomic_inc(&mddev->writes_pending);
+ if (bio_data_dir(bi) != WRITE)
+ return 1;
+
+ atomic_inc(&mddev->writes_pending);
+ spin_lock(&mddev->write_lock);
+ if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
+ spin_unlock(&mddev->write_lock);
+ return 1;
+ }
+ bio_list_add(&mddev->write_list, bi);
+
+ if (mddev->in_sync) {
+ mddev->in_sync = 0;
+ mddev->sb_dirty = 1;
+ }
+ spin_unlock(&mddev->write_lock);
+ md_wakeup_thread(mddev->thread);
+ return 0;
}
void md_write_end(mddev_t *mddev)
@@ -3472,6 +3501,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->sb_dirty ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
+ mddev->write_list.head ||
(mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
@@ -3480,7 +3510,9 @@ void md_check_recovery(mddev_t *mddev)
if (mddev_trylock(mddev)==0) {
int spares =0;
+ struct bio *blist;
+ spin_lock(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1;
@@ -3488,9 +3520,22 @@ void md_check_recovery(mddev_t *mddev)
}
if (mddev->safemode == 1)
mddev->safemode = 0;
+ blist = bio_list_get(&mddev->write_list);
+ spin_unlock(&mddev->write_lock);
if (mddev->sb_dirty)
md_update_sb(mddev);
+
+ while (blist) {
+ struct bio *b = blist;
+ blist = blist->bi_next;
+ b->bi_next = NULL;
+ generic_make_request(b);
+ /* we already counted this, so need to un-count */
+ md_write_end(mddev);
+ }
+
+
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
/* resync/recovery still happening */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b34ad56..3f1280b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -530,6 +530,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
+ if (md_write_start(mddev, bio)==0)
+ return 0;
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
conf->nr_pending++;
@@ -611,7 +613,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
rcu_read_unlock();
atomic_set(&r1_bio->remaining, 1);
- md_write_start(mddev);
+
for (i = 0; i < disks; i++) {
struct bio *mbio;
if (!r1_bio->bios[i])
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9ae2150..bfc9f52 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -700,6 +700,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
return 0;
}
+ if (md_write_start(mddev, bio) == 0)
+ return 0;
+
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
@@ -774,7 +777,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
rcu_read_unlock();
atomic_set(&r10_bio->remaining, 1);
- md_write_start(mddev);
+
for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
int d = r10_bio->devs[i].devnum;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 63b1c59..677ce49 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1411,6 +1411,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector;
struct stripe_head *sh;
+ if (md_write_start(mddev, bi)==0)
+ return 0;
+
if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes);
disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1423,8 +1426,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
last_sector = bi->bi_sector + (bi->bi_size>>9);
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- if ( bio_data_dir(bi) == WRITE )
- md_write_start(mddev);
+
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 9d0e0e4..fede16c 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1570,6 +1570,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector;
struct stripe_head *sh;
+ if (md_write_start(mddev, bi)==0)
+ return 0;
+
if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes);
disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1583,8 +1586,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- if ( bio_data_dir(bi) == WRITE )
- md_write_start(mddev);
+
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index a6a67d1..cfde8f4 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
extern void md_unregister_thread (mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
-extern void md_write_start(mddev_t *mddev);
+extern int md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_handle_safemode(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index c9a0d40..d92db54 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -15,6 +15,9 @@
#ifndef _MD_K_H
#define _MD_K_H
+/* and dm-bio-list.h is not under include/linux because.... ??? */
+#include "../../../drivers/md/dm-bio-list.h"
+
#define MD_RESERVED 0UL
#define LINEAR 1UL
#define RAID0 2UL
@@ -252,6 +255,10 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;
+
+ spinlock_t write_lock;
+ struct bio_list write_list;
+
unsigned int safemode; /* if set, update "clean" superblock
* when no writes pending.
*/
OpenPOWER on IntegriCloud