diff options
Diffstat (limited to 'drivers/staging/lustre/lustre/osc/osc_page.c')
-rw-r--r-- | drivers/staging/lustre/lustre/osc/osc_page.c | 186 |
1 files changed, 100 insertions, 86 deletions
diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index 2a7a70a..e356e4af 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -37,6 +37,7 @@ #define DEBUG_SUBSYSTEM S_OSC +#include <linux/math64.h> #include "osc_cl_internal.h" static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); @@ -86,11 +87,6 @@ static void osc_page_transfer_add(const struct lu_env *env, struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj); osc_lru_use(osc_cli(obj), opg); - - spin_lock(&obj->oo_seatbelt); - list_add(&opg->ops_inflight, &obj->oo_inflight[crt]); - opg->ops_submitter = current; - spin_unlock(&obj->oo_seatbelt); } int osc_page_cache_add(const struct lu_env *env, @@ -109,7 +105,8 @@ int osc_page_cache_add(const struct lu_env *env, return result; } -void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, +void osc_index2policy(union ldlm_policy_data *policy, + const struct cl_object *obj, pgoff_t start, pgoff_t end) { memset(policy, 0, sizeof(*policy)); @@ -117,25 +114,6 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj, policy->l_extent.end = cl_offset(obj, end + 1) - 1; } -static int osc_page_is_under_lock(const struct lu_env *env, - const struct cl_page_slice *slice, - struct cl_io *unused, pgoff_t *max_index) -{ - struct osc_page *opg = cl2osc_page(slice); - struct ldlm_lock *dlmlock; - int result = -ENODATA; - - dlmlock = osc_dlmlock_at_pgoff(env, cl2osc(slice->cpl_obj), - osc_index(opg), 1, 0); - if (dlmlock) { - *max_index = cl_index(slice->cpl_obj, - dlmlock->l_policy_data.l_extent.end); - LDLM_LOCK_PUT(dlmlock); - result = 0; - } - return result; -} - static const char *osc_list(struct list_head *head) { return list_empty(head) ? "-" : "+"; @@ -158,7 +136,7 @@ static int osc_page_print(const struct lu_env *env, struct osc_object *obj = cl2osc(slice->cpl_obj); struct client_obd *cli = &osc_export(obj)->exp_obd->u.cli; - return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", + return (*printer)(env, cookie, LUSTRE_OSC_NAME "-page@%p %lu: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n", opg, osc_index(opg), /* 1 */ oap->oap_magic, oap->oap_cmd, @@ -170,8 +148,7 @@ static int osc_page_print(const struct lu_env *env, oap->oap_async_flags, oap->oap_brw_flags, oap->oap_request, oap->oap_cli, obj, /* 3 */ - osc_list(&opg->ops_inflight), - opg->ops_submitter, opg->ops_transfer_pinned, + opg->ops_transfer_pinned, osc_submit_duration(opg), opg->ops_srvlock, /* 4 */ cli->cl_r_in_flight, cli->cl_w_in_flight, @@ -210,14 +187,6 @@ static void osc_page_delete(const struct lu_env *env, LASSERT(0); } - spin_lock(&obj->oo_seatbelt); - if (opg->ops_submitter) { - LASSERT(!list_empty(&opg->ops_inflight)); - list_del_init(&opg->ops_inflight); - opg->ops_submitter = NULL; - } - spin_unlock(&obj->oo_seatbelt); - osc_lru_del(osc_cli(obj), opg); if (slice->cpl_page->cp_type == CPT_CACHEABLE) { @@ -276,7 +245,6 @@ static int osc_page_flush(const struct lu_env *env, static const struct cl_page_operations osc_page_ops = { .cpo_print = osc_page_print, .cpo_delete = osc_page_delete, - .cpo_is_under_lock = osc_page_is_under_lock, .cpo_clip = osc_page_clip, .cpo_cancel = osc_page_cancel, .cpo_flush = osc_page_flush @@ -301,10 +269,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, cl_page_slice_add(page, &opg->ops_cl, obj, index, &osc_page_ops); } - /* ops_inflight and ops_lru are the same field, but it doesn't - * hurt to initialize it twice :-) - */ - INIT_LIST_HEAD(&opg->ops_inflight); INIT_LIST_HEAD(&opg->ops_lru); /* reserve an LRU space for this page */ @@ -362,16 +326,27 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, * OSC to free slots voluntarily to maintain a reasonable number of free slots * at any time. */ - static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq); -/* LRU pages are freed in batch mode. OSC should at least free this - * number of pages to avoid running out of LRU budget, and.. + +/** + * LRU pages are freed in batch mode. OSC should at least free this + * number of pages to avoid running out of LRU slots. + */ +static inline int lru_shrink_min(struct client_obd *cli) +{ + return cli->cl_max_pages_per_rpc * 2; +} + +/** + * free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_min = 2 << (20 - PAGE_SHIFT); /* 2M */ -/* free this number at most otherwise it will take too long time to finish. */ -static const int lru_shrink_max = 8 << (20 - PAGE_SHIFT); /* 8M */ +static inline int lru_shrink_max(struct client_obd *cli) +{ + return cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; +} -/* Check if we can free LRU slots from this OSC. If there exists LRU waiters, +/** + * Check if we can free LRU slots from this OSC. If there exists LRU waiters, * we should free slots aggressively. In this way, slots are freed in a steady * step to maintain fairness among OSCs. * @@ -388,13 +363,20 @@ static int osc_cache_too_much(struct client_obd *cli) /* if it's going to run out LRU slots, we should free some, but not * too much to maintain fairness among OSCs. */ - if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 4) { + if (atomic_long_read(cli->cl_lru_left) < cache->ccc_lru_max >> 2) { if (pages >= budget) - return lru_shrink_max; + return lru_shrink_max(cli); else if (pages >= budget / 2) - return lru_shrink_min; - } else if (pages >= budget * 2) { - return lru_shrink_min; + return lru_shrink_min(cli); + } else { + time64_t duration = ktime_get_real_seconds(); + + /* knock out pages by duration of no IO activity */ + duration -= cli->cl_lru_last_used; + duration >>= 6; /* approximately 1 minute */ + if (duration > 0 && + pages >= div64_s64((s64)budget, duration)) + return lru_shrink_min(cli); } return 0; } @@ -402,11 +384,21 @@ static int osc_cache_too_much(struct client_obd *cli) int lru_queue_work(const struct lu_env *env, void *data) { struct client_obd *cli = data; + int count; - CDEBUG(D_CACHE, "Run LRU work for client obd %p.\n", cli); + CDEBUG(D_CACHE, "%s: run LRU work for client obd\n", cli_name(cli)); - if (osc_cache_too_much(cli)) - osc_lru_shrink(env, cli, lru_shrink_max, true); + count = osc_cache_too_much(cli); + if (count > 0) { + int rc = osc_lru_shrink(env, cli, count, false); + + CDEBUG(D_CACHE, "%s: shrank %d/%d pages from client obd\n", + cli_name(cli), rc, count); + if (rc >= count) { + CDEBUG(D_CACHE, "%s: queue again\n", cli_name(cli)); + ptlrpcd_queue_work(cli->cl_lru_work); + } + } return 0; } @@ -433,10 +425,10 @@ void osc_lru_add_batch(struct client_obd *cli, struct list_head *plist) list_splice_tail(&lru, &cli->cl_lru_list); atomic_long_sub(npages, &cli->cl_lru_busy); atomic_long_add(npages, &cli->cl_lru_in_list); + cli->cl_lru_last_used = ktime_get_real_seconds(); spin_unlock(&cli->cl_lru_list_lock); - /* XXX: May set force to be true for better performance */ - if (osc_cache_too_much(cli)) + if (waitqueue_active(&osc_lru_waitq)) (void)ptlrpcd_queue_work(cli->cl_lru_work); } } @@ -469,8 +461,10 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg) * this osc occupies too many LRU pages and kernel is * stealing one of them. */ - if (!memory_pressure_get()) + if (osc_cache_too_much(cli)) { + CDEBUG(D_CACHE, "%s: queue LRU work\n", cli_name(cli)); (void)ptlrpcd_queue_work(cli->cl_lru_work); + } wake_up(&osc_lru_waitq); } else { LASSERT(list_empty(&opg->ops_lru)); @@ -502,6 +496,7 @@ static void discard_pagevec(const struct lu_env *env, struct cl_io *io, struct cl_page *page = pvec[i]; LASSERT(cl_page_is_owned(page, io)); + cl_page_delete(env, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); cl_page_put(env, page); @@ -542,7 +537,6 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, struct cl_object *clobj = NULL; struct cl_page **pvec; struct osc_page *opg; - struct osc_page *temp; int maxscan = 0; long count = 0; int index = 0; @@ -552,6 +546,8 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, if (atomic_long_read(&cli->cl_lru_in_list) == 0 || target <= 0) return 0; + CDEBUG(D_CACHE, "%s: shrinkers: %d, force: %d\n", + cli_name(cli), atomic_read(&cli->cl_lru_shrinkers), force); if (!force) { if (atomic_read(&cli->cl_lru_shrinkers) > 0) return -EBUSY; @@ -568,14 +564,21 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, io = &osc_env_info(env)->oti_io; spin_lock(&cli->cl_lru_list_lock); + if (force) + cli->cl_lru_reclaim++; maxscan = min(target << 1, atomic_long_read(&cli->cl_lru_in_list)); - list_for_each_entry_safe(opg, temp, &cli->cl_lru_list, ops_lru) { + while (!list_empty(&cli->cl_lru_list)) { struct cl_page *page; bool will_free = false; + if (!force && atomic_read(&cli->cl_lru_shrinkers) > 1) + break; + if (--maxscan < 0) break; + opg = list_entry(cli->cl_lru_list.next, struct osc_page, + ops_lru); page = opg->ops_cl.cpl_page; if (lru_page_busy(cli, page)) { list_move_tail(&opg->ops_lru, &cli->cl_lru_list); @@ -662,34 +665,43 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, return count > 0 ? count : rc; } -long osc_lru_reclaim(struct client_obd *cli) +/** + * Reclaim LRU pages by an IO thread. The caller wants to reclaim at least + * \@npages of LRU slots. For performance consideration, it's better to drop + * LRU pages in batch. Therefore, the actual number is adjusted at least + * max_pages_per_rpc. + */ +long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) { - struct cl_env_nest nest; struct lu_env *env; struct cl_client_cache *cache = cli->cl_cache; int max_scans; + int refcheck; long rc = 0; LASSERT(cache); - env = cl_env_nested_get(&nest); + env = cl_env_get(&refcheck); if (IS_ERR(env)) return 0; - rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), false); - if (rc != 0) { - if (rc == -EBUSY) - rc = 0; - - CDEBUG(D_CACHE, "%s: Free %ld pages from own LRU: %p.\n", - cli->cl_import->imp_obd->obd_name, rc, cli); + npages = max_t(int, npages, cli->cl_max_pages_per_rpc); + CDEBUG(D_CACHE, "%s: start to reclaim %ld pages from LRU\n", + cli_name(cli), npages); + rc = osc_lru_shrink(env, cli, npages, true); + if (rc >= npages) { + CDEBUG(D_CACHE, "%s: reclaimed %ld/%ld pages from LRU\n", + cli_name(cli), rc, npages); + if (osc_cache_too_much(cli) > 0) + ptlrpcd_queue_work(cli->cl_lru_work); goto out; + } else if (rc > 0) { + npages -= rc; } - CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld, busy: %ld.\n", - cli->cl_import->imp_obd->obd_name, cli, - atomic_long_read(&cli->cl_lru_in_list), - atomic_long_read(&cli->cl_lru_busy)); + CDEBUG(D_CACHE, "%s: cli %p no free slots, pages: %ld/%ld, want: %ld\n", + cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list), + atomic_long_read(&cli->cl_lru_busy), npages); /* Reclaim LRU slots from other client_obd as it can't free enough * from its own. This should rarely happen. @@ -706,7 +718,7 @@ long osc_lru_reclaim(struct client_obd *cli) cl_lru_osc); CDEBUG(D_CACHE, "%s: cli %p LRU pages: %ld, busy: %ld.\n", - cli->cl_import->imp_obd->obd_name, cli, + cli_name(cli), cli, atomic_long_read(&cli->cl_lru_in_list), atomic_long_read(&cli->cl_lru_busy)); @@ -714,19 +726,20 @@ long osc_lru_reclaim(struct client_obd *cli) if (osc_cache_too_much(cli) > 0) { spin_unlock(&cache->ccc_lru_lock); - rc = osc_lru_shrink(env, cli, osc_cache_too_much(cli), - true); + rc = osc_lru_shrink(env, cli, npages, true); spin_lock(&cache->ccc_lru_lock); - if (rc != 0) + if (rc >= npages) break; + if (rc > 0) + npages -= rc; } } spin_unlock(&cache->ccc_lru_lock); out: - cl_env_nested_put(&nest, env); + cl_env_put(env, &refcheck); CDEBUG(D_CACHE, "%s: cli %p freed %ld pages.\n", - cli->cl_import->imp_obd->obd_name, cli, rc); + cli_name(cli), cli, rc); return rc; } @@ -756,7 +769,7 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, LASSERT(atomic_long_read(cli->cl_lru_left) >= 0); while (!atomic_long_add_unless(cli->cl_lru_left, -1, 0)) { /* run out of LRU spaces, try to drop some by itself */ - rc = osc_lru_reclaim(cli); + rc = osc_lru_reclaim(cli, 1); if (rc < 0) break; if (rc > 0) @@ -796,8 +809,10 @@ static inline void unstable_page_accounting(struct ptlrpc_bulk_desc *desc, int count = 0; int i; + LASSERT(ptlrpc_is_bulk_desc_kiov(desc->bd_type)); + for (i = 0; i < page_count; i++) { - pg_data_t *pgdat = page_pgdat(desc->bd_iov[i].bv_page); + pg_data_t *pgdat = page_pgdat(BD_GET_KIOV(desc, i).bv_page); if (likely(pgdat == last)) { ++count; @@ -857,7 +872,7 @@ void osc_dec_unstable_pages(struct ptlrpc_request *req) if (!unstable_count) wake_up_all(&cli->cl_cache->ccc_unstable_waitq); - if (osc_cache_too_much(cli)) + if (waitqueue_active(&osc_lru_waitq)) (void)ptlrpcd_queue_work(cli->cl_lru_work); } @@ -913,8 +928,7 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli) CDEBUG(D_CACHE, "%s: cli: %p unstable pages: %lu, osc unstable pages: %lu\n", - cli->cl_import->imp_obd->obd_name, cli, - unstable_nr, osc_unstable_count); + cli_name(cli), cli, unstable_nr, osc_unstable_count); /* * If the LRU slots are in shortage - 25% remaining AND this OSC |