diff options
Diffstat (limited to 'fs/nfs/pagelist.c')
-rw-r--r-- | fs/nfs/pagelist.c | 220 |
1 files changed, 206 insertions, 14 deletions
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f343f49..015fb7b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,6 +29,8 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +static void nfs_free_request(struct nfs_page *); + static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; @@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c) return __nfs_iocounter_wait(c); } +/* + * nfs_page_group_lock - lock the head of the page group + * @req - request in group that is to be locked + * + * this lock must be held if modifying the page group list + */ +void +nfs_page_group_lock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + int err = -EAGAIN; + + WARN_ON_ONCE(head != head->wb_head); + + while (err) + err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + nfs_wait_bit_killable, TASK_KILLABLE); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req - request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + smp_mb__before_clear_bit(); + clear_bit(PG_HEADLOCK, &head->wb_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&head->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_sync_on_bit_locked + * + * must be called with page group lock held + */ +static bool +nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +{ + struct nfs_page *head = req->wb_head; + struct nfs_page *tmp; + + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); + WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); + + tmp = req->wb_this_page; + while (tmp != req) { + if (!test_bit(bit, &tmp->wb_flags)) + return false; + tmp = tmp->wb_this_page; + } + + /* true! reset all bits */ + tmp = req; + do { + clear_bit(bit, &tmp->wb_flags); + tmp = tmp->wb_this_page; + } while (tmp != req); + + return true; +} + +/* + * nfs_page_group_sync_on_bit - set bit on current request, but only + * return true if the bit is set for all requests in page group + * @req - request in page group + * @bit - PG_* bit that is used to sync page group + */ +bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) +{ + bool ret; + + nfs_page_group_lock(req); + ret = nfs_page_group_sync_on_bit_locked(req, bit); + nfs_page_group_unlock(req); + + return ret; +} + +/* + * nfs_page_group_init - Initialize the page group linkage for @req + * @req - a new nfs request + * @prev - the previous request in page group, or NULL if @req is the first + * or only request in the group (the head). + */ +static inline void +nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) +{ + WARN_ON_ONCE(prev == req); + + if (!prev) { + req->wb_head = req; + req->wb_this_page = req; + } else { + WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); + req->wb_head = prev->wb_head; + req->wb_this_page = prev->wb_this_page; + prev->wb_this_page = req; + + /* grab extra ref if head request has extra ref from + * the write/commit path to handle handoff between write + * and commit lists */ + if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) + kref_get(&req->wb_kref); + } +} + +/* + * nfs_page_group_destroy - sync the destruction of page groups + * @req - request that no longer needs the page group + * + * releases the page group reference from each member once all + * members have called this function. + */ +static void +nfs_page_group_destroy(struct kref *kref) +{ + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *tmp, *next; + + if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) + return; + + tmp = req; + do { + next = tmp->wb_this_page; + /* unlink and free */ + tmp->wb_this_page = tmp; + tmp->wb_head = tmp; + nfs_free_request(tmp); + tmp = next; + } while (tmp != req); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use * @page: page to write + * @last: last nfs request created for this page group or NULL if head * @offset: starting offset within the page for the write * @count: number of bytes to read/write * @@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c) */ struct nfs_page * nfs_create_request(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) + struct nfs_page *last, unsigned int offset, + unsigned int count) { struct nfs_page *req; struct nfs_lock_context *l_ctx; @@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + nfs_page_group_init(req, last); return req; } @@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req) } } - /** * nfs_release_request - Release the count on an NFS read/write request * @req: request to release * * Note: Should never be called with the spinlock held! */ -static void nfs_free_request(struct kref *kref) +static void nfs_free_request(struct nfs_page *req) { - struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + WARN_ON_ONCE(req->wb_this_page != req); + + /* extra debug: make sure no sync bits are still set */ + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); @@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref) void nfs_release_request(struct nfs_page *req) { - kref_put(&req->wb_kref, nfs_free_request); + kref_put(&req->wb_kref, nfs_page_group_destroy); } static int nfs_wait_bit_uninterruptible(void *word) @@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * @desc: destination io descriptor * @req: request * + * This may split a request into subrequests which are all part of the + * same page group. + * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - while (!nfs_pageio_do_add_request(desc, req)) { - desc->pg_moreio = 1; - nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - desc->pg_moreio = 0; - if (desc->pg_recoalesce) - return 0; - } + struct nfs_page *subreq; + unsigned int bytes_left = 0; + unsigned int offset, pgbase; + + nfs_page_group_lock(req); + + subreq = req; + bytes_left = subreq->wb_bytes; + offset = subreq->wb_offset; + pgbase = subreq->wb_pgbase; + + do { + if (!nfs_pageio_do_add_request(desc, subreq)) { + /* make sure pg_test call(s) did nothing */ + WARN_ON_ONCE(subreq->wb_bytes != bytes_left); + WARN_ON_ONCE(subreq->wb_offset != offset); + WARN_ON_ONCE(subreq->wb_pgbase != pgbase); + + nfs_page_group_unlock(req); + desc->pg_moreio = 1; + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; + /* retry add_request for this subreq */ + nfs_page_group_lock(req); + continue; + } + + /* check for buggy pg_test call(s) */ + WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); + WARN_ON_ONCE(subreq->wb_bytes > bytes_left); + WARN_ON_ONCE(subreq->wb_bytes == 0); + + bytes_left -= subreq->wb_bytes; + offset += subreq->wb_bytes; + pgbase += subreq->wb_bytes; + + if (bytes_left) { + subreq = nfs_create_request(req->wb_context, + req->wb_page, + subreq, pgbase, bytes_left); + nfs_lock_request(subreq); + subreq->wb_offset = offset; + subreq->wb_index = req->wb_index; + } + } while (bytes_left > 0); + + nfs_page_group_unlock(req); return 1; } |