summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c215
1 files changed, 174 insertions, 41 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e4..3ab80e9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
return NULL;
}
+static void requeue_list(struct btrfs_pending_bios *pending_bios,
+ struct bio *head, struct bio *tail)
+{
+
+ struct bio *old_head;
+
+ old_head = pending_bios->head;
+ pending_bios->head = head;
+ if (pending_bios->tail)
+ tail->bi_next = old_head;
+ else
+ pending_bios->tail = tail;
+}
+
/*
* we try to collect pending bios for a device so we don't get a large
* number of procs sending bios down to the same device. This greatly
@@ -141,32 +155,49 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
struct bio *pending;
struct backing_dev_info *bdi;
struct btrfs_fs_info *fs_info;
+ struct btrfs_pending_bios *pending_bios;
struct bio *tail;
struct bio *cur;
int again = 0;
- unsigned long num_run = 0;
+ unsigned long num_run;
+ unsigned long num_sync_run;
+ unsigned long batch_run = 0;
unsigned long limit;
unsigned long last_waited = 0;
+ int force_reg = 0;
bdi = blk_get_backing_dev_info(device->bdev);
fs_info = device->dev_root->fs_info;
limit = btrfs_async_submit_limit(fs_info);
limit = limit * 2 / 3;
+ /* we want to make sure that every time we switch from the sync
+ * list to the normal list, we unplug
+ */
+ num_sync_run = 0;
+
loop:
spin_lock(&device->io_lock);
loop_lock:
+ num_run = 0;
+
/* take all the bios off the list at once and process them
* later on (without the lock held). But, remember the
* tail and other pointers so the bios can be properly reinserted
* into the list if we hit congestion
*/
- pending = device->pending_bios;
- tail = device->pending_bio_tail;
+ if (!force_reg && device->pending_sync_bios.head) {
+ pending_bios = &device->pending_sync_bios;
+ force_reg = 1;
+ } else {
+ pending_bios = &device->pending_bios;
+ force_reg = 0;
+ }
+
+ pending = pending_bios->head;
+ tail = pending_bios->tail;
WARN_ON(pending && !tail);
- device->pending_bios = NULL;
- device->pending_bio_tail = NULL;
/*
* if pending was null this time around, no bios need processing
@@ -176,16 +207,45 @@ loop_lock:
* device->running_pending is used to synchronize with the
* schedule_bio code.
*/
- if (pending) {
- again = 1;
- device->running_pending = 1;
- } else {
+ if (device->pending_sync_bios.head == NULL &&
+ device->pending_bios.head == NULL) {
again = 0;
device->running_pending = 0;
+ } else {
+ again = 1;
+ device->running_pending = 1;
}
+
+ pending_bios->head = NULL;
+ pending_bios->tail = NULL;
+
spin_unlock(&device->io_lock);
+ /*
+ * if we're doing the regular priority list, make sure we unplug
+ * for any high prio bios we've sent down
+ */
+ if (pending_bios == &device->pending_bios && num_sync_run > 0) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
while (pending) {
+
+ rmb();
+ /* we want to work on both lists, but do more bios on the
+ * sync list than the regular list
+ */
+ if ((num_run > 32 &&
+ pending_bios != &device->pending_sync_bios &&
+ device->pending_sync_bios.head) ||
+ (num_run > 64 && pending_bios == &device->pending_sync_bios &&
+ device->pending_bios.head)) {
+ spin_lock(&device->io_lock);
+ requeue_list(pending_bios, pending, tail);
+ goto loop_lock;
+ }
+
cur = pending;
pending = pending->bi_next;
cur->bi_next = NULL;
@@ -196,19 +256,28 @@ loop_lock:
wake_up(&fs_info->async_submit_wait);
BUG_ON(atomic_read(&cur->bi_cnt) == 0);
- bio_get(cur);
submit_bio(cur->bi_rw, cur);
- bio_put(cur);
num_run++;
+ batch_run++;
+
+ if (bio_sync(cur))
+ num_sync_run++;
+
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
/*
* we made progress, there is more work to do and the bdi
* is now congested. Back off and let other work structs
* run instead
*/
- if (pending && bdi_write_congested(bdi) && num_run > 16 &&
+ if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
fs_info->fs_devices->open_devices > 1) {
- struct bio *old_head;
struct io_context *ioc;
ioc = current->io_context;
@@ -233,17 +302,17 @@ loop_lock:
* against it before looping
*/
last_waited = ioc->last_waited;
+ if (need_resched()) {
+ if (num_sync_run) {
+ blk_run_backing_dev(bdi, NULL);
+ num_sync_run = 0;
+ }
+ cond_resched();
+ }
continue;
}
spin_lock(&device->io_lock);
-
- old_head = device->pending_bios;
- device->pending_bios = pending;
- if (device->pending_bio_tail)
- tail->bi_next = old_head;
- else
- device->pending_bio_tail = tail;
-
+ requeue_list(pending_bios, pending, tail);
device->running_pending = 1;
spin_unlock(&device->io_lock);
@@ -251,11 +320,18 @@ loop_lock:
goto done;
}
}
+
+ if (num_sync_run) {
+ num_sync_run = 0;
+ blk_run_backing_dev(bdi, NULL);
+ }
+
+ cond_resched();
if (again)
goto loop;
spin_lock(&device->io_lock);
- if (device->pending_bios)
+ if (device->pending_bios.head || device->pending_sync_bios.head)
goto loop_lock;
spin_unlock(&device->io_lock);
@@ -301,6 +377,7 @@ static noinline int device_list_add(const char *path,
memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
fs_devices->latest_devid = devid;
fs_devices->latest_trans = found_transid;
+ mutex_init(&fs_devices->device_list_mutex);
device = NULL;
} else {
device = __find_device(&fs_devices->devices, devid,
@@ -327,7 +404,11 @@ static noinline int device_list_add(const char *path,
return -ENOMEM;
}
INIT_LIST_HEAD(&device->dev_alloc_list);
+
+ mutex_lock(&fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_devices->devices);
+ mutex_unlock(&fs_devices->device_list_mutex);
+
device->fs_devices = fs_devices;
fs_devices->num_devices++;
}
@@ -353,10 +434,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
INIT_LIST_HEAD(&fs_devices->devices);
INIT_LIST_HEAD(&fs_devices->alloc_list);
INIT_LIST_HEAD(&fs_devices->list);
+ mutex_init(&fs_devices->device_list_mutex);
fs_devices->latest_devid = orig->latest_devid;
fs_devices->latest_trans = orig->latest_trans;
memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
+ mutex_lock(&orig->device_list_mutex);
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
device = kzalloc(sizeof(*device), GFP_NOFS);
if (!device)
@@ -378,8 +461,10 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
device->fs_devices = fs_devices;
fs_devices->num_devices++;
}
+ mutex_unlock(&orig->device_list_mutex);
return fs_devices;
error:
+ mutex_unlock(&orig->device_list_mutex);
free_fs_devices(fs_devices);
return ERR_PTR(-ENOMEM);
}
@@ -390,6 +475,7 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
mutex_lock(&uuid_mutex);
again:
+ mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
if (device->in_fs_metadata)
continue;
@@ -409,6 +495,7 @@ again:
kfree(device->name);
kfree(device);
}
+ mutex_unlock(&fs_devices->device_list_mutex);
if (fs_devices->seed) {
fs_devices = fs_devices->seed;
@@ -529,6 +616,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
device->in_fs_metadata = 0;
device->mode = flags;
+ if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+ fs_devices->rotating = 1;
+
fs_devices->open_devices++;
if (device->writeable) {
fs_devices->rw_devices++;
@@ -1056,12 +1146,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
device = NULL;
devices = &root->fs_info->fs_devices->devices;
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_for_each_entry(tmp, devices, dev_list) {
if (tmp->in_fs_metadata && !tmp->bdev) {
device = tmp;
break;
}
}
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
bdev = NULL;
bh = NULL;
disk_super = NULL;
@@ -1116,7 +1208,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
goto error_brelse;
device->in_fs_metadata = 0;
+
+ /*
+ * the device list mutex makes sure that we don't change
+ * the device list while someone else is writing out all
+ * the device supers.
+ */
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_del_init(&device->dev_list);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
device->fs_devices->num_devices--;
next_device = list_entry(root->fs_info->fs_devices->devices.next,
@@ -1210,6 +1311,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
seed_devices->opened = 1;
INIT_LIST_HEAD(&seed_devices->devices);
INIT_LIST_HEAD(&seed_devices->alloc_list);
+ mutex_init(&seed_devices->device_list_mutex);
list_splice_init(&fs_devices->devices, &seed_devices->devices);
list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1335,6 +1437,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
mutex_lock(&root->fs_info->volume_mutex);
devices = &root->fs_info->fs_devices->devices;
+ /*
+ * we have the volume lock, so we don't need the extra
+ * device list mutex while reading the list here.
+ */
list_for_each_entry(device, devices, dev_list) {
if (device->bdev == bdev) {
ret = -EEXIST;
@@ -1375,6 +1481,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->io_align = root->sectorsize;
device->sector_size = root->sectorsize;
device->total_bytes = i_size_read(bdev->bd_inode);
+ device->disk_total_bytes = device->total_bytes;
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
@@ -1388,6 +1495,12 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
}
device->fs_devices = root->fs_info->fs_devices;
+
+ /*
+ * we don't want write_supers to jump in here with our device
+ * half setup
+ */
+ mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
list_add(&device->dev_alloc_list,
&root->fs_info->fs_devices->alloc_list);
@@ -1396,6 +1509,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
root->fs_info->fs_devices->rw_devices++;
root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
+ if (!blk_queue_nonrot(bdev_get_queue(bdev)))
+ root->fs_info->fs_devices->rotating = 1;
+
total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
btrfs_set_super_total_bytes(&root->fs_info->super_copy,
total_bytes + device->total_bytes);
@@ -1403,6 +1519,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
btrfs_set_super_num_devices(&root->fs_info->super_copy,
total_bytes + 1);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
ret = init_first_rw_device(trans, root, device);
@@ -1478,7 +1595,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
btrfs_set_device_io_align(leaf, dev_item, device->io_align);
btrfs_set_device_io_width(leaf, dev_item, device->io_width);
btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
- btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
btrfs_mark_buffer_dirty(leaf);
@@ -1605,8 +1722,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
int ret;
int i;
- printk(KERN_INFO "btrfs relocating chunk %llu\n",
- (unsigned long long)chunk_offset);
root = root->fs_info->chunk_root;
extent_root = root->fs_info->extent_root;
em_tree = &root->fs_info->mapping_tree.map_tree;
@@ -1875,14 +1990,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
device->total_bytes = new_size;
if (device->writeable)
device->fs_devices->total_rw_bytes -= diff;
- ret = btrfs_update_device(trans, device);
- if (ret) {
- unlock_chunks(root);
- btrfs_end_transaction(trans, root);
- goto done;
- }
- WARN_ON(diff > old_total);
- btrfs_set_super_total_bytes(super_copy, old_total - diff);
unlock_chunks(root);
btrfs_end_transaction(trans, root);
@@ -1914,7 +2021,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
length = btrfs_dev_extent_length(l, dev_extent);
if (key.offset + length <= new_size)
- goto done;
+ break;
chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +2034,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
goto done;
}
+ /* Shrinking succeeded, else we would be at "done". */
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ lock_chunks(root);
+
+ device->disk_total_bytes = new_size;
+ /* Now btrfs_update_device() will change the on-disk size. */
+ ret = btrfs_update_device(trans, device);
+ if (ret) {
+ unlock_chunks(root);
+ btrfs_end_transaction(trans, root);
+ goto done;
+ }
+ WARN_ON(diff > old_total);
+ btrfs_set_super_total_bytes(super_copy, old_total - diff);
+ unlock_chunks(root);
+ btrfs_end_transaction(trans, root);
done:
btrfs_free_path(path);
return ret;
@@ -2497,7 +2624,7 @@ again:
max_errors = 1;
}
}
- if (multi_ret && rw == WRITE &&
+ if (multi_ret && (rw & (1 << BIO_RW)) &&
stripes_allocated < stripes_required) {
stripes_allocated = map->num_stripes;
free_extent_map(em);
@@ -2762,6 +2889,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
int rw, struct bio *bio)
{
int should_queue = 1;
+ struct btrfs_pending_bios *pending_bios;
/* don't bother with additional async steps for reads, right now */
if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2911,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
+ if (bio_sync(bio))
+ pending_bios = &device->pending_sync_bios;
+ else
+ pending_bios = &device->pending_bios;
- if (device->pending_bio_tail)
- device->pending_bio_tail->bi_next = bio;
+ if (pending_bios->tail)
+ pending_bios->tail->bi_next = bio;
- device->pending_bio_tail = bio;
- if (!device->pending_bios)
- device->pending_bios = bio;
+ pending_bios->tail = bio;
+ if (!pending_bios->head)
+ pending_bios->head = bio;
if (device->running_pending)
should_queue = 0;
@@ -3006,7 +3138,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
unsigned long ptr;
device->devid = btrfs_device_id(leaf, dev_item);
- device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->total_bytes = device->disk_total_bytes;
device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
device->type = btrfs_device_type(leaf, dev_item);
device->io_align = btrfs_device_io_align(leaf, dev_item);
OpenPOWER on IntegriCloud