diff options
Diffstat (limited to 'drivers/md/dm-cache-policy-mq.c')
-rw-r--r-- | drivers/md/dm-cache-policy-mq.c | 681 |
1 files changed, 396 insertions, 285 deletions
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 4296155..416b7b7 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -26,19 +26,6 @@ static unsigned next_power(unsigned n, unsigned min) /*----------------------------------------------------------------*/ -static unsigned long *alloc_bitset(unsigned nr_entries) -{ - size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); - return vzalloc(s); -} - -static void free_bitset(unsigned long *bits) -{ - vfree(bits); -} - -/*----------------------------------------------------------------*/ - /* * Large, sequential ios are probably better left on the origin device since * spindles tend to have good bandwidth. @@ -151,6 +138,21 @@ static void queue_init(struct queue *q) } /* + * Checks to see if the queue is empty. + * FIXME: reduce cpu usage. + */ +static bool queue_empty(struct queue *q) +{ + unsigned i; + + for (i = 0; i < NR_QUEUE_LEVELS; i++) + if (!list_empty(q->qs + i)) + return false; + + return true; +} + +/* * Insert an entry to the back of the given level. */ static void queue_push(struct queue *q, unsigned level, struct list_head *elt) @@ -218,17 +220,116 @@ struct entry { struct hlist_node hlist; struct list_head list; dm_oblock_t oblock; - dm_cblock_t cblock; /* valid iff in_cache */ /* * FIXME: pack these better */ - bool in_cache:1; + bool dirty:1; unsigned hit_count; unsigned generation; unsigned tick; }; +/* + * Rather than storing the cblock in an entry, we allocate all entries in + * an array, and infer the cblock from the entry position. + * + * Free entries are linked together into a list. + */ +struct entry_pool { + struct entry *entries, *entries_end; + struct list_head free; + unsigned nr_allocated; +}; + +static int epool_init(struct entry_pool *ep, unsigned nr_entries) +{ + unsigned i; + + ep->entries = vzalloc(sizeof(struct entry) * nr_entries); + if (!ep->entries) + return -ENOMEM; + + ep->entries_end = ep->entries + nr_entries; + + INIT_LIST_HEAD(&ep->free); + for (i = 0; i < nr_entries; i++) + list_add(&ep->entries[i].list, &ep->free); + + ep->nr_allocated = 0; + + return 0; +} + +static void epool_exit(struct entry_pool *ep) +{ + vfree(ep->entries); +} + +static struct entry *alloc_entry(struct entry_pool *ep) +{ + struct entry *e; + + if (list_empty(&ep->free)) + return NULL; + + e = list_entry(list_pop(&ep->free), struct entry, list); + INIT_LIST_HEAD(&e->list); + INIT_HLIST_NODE(&e->hlist); + ep->nr_allocated++; + + return e; +} + +/* + * This assumes the cblock hasn't already been allocated. + */ +static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock) +{ + struct entry *e = ep->entries + from_cblock(cblock); + list_del(&e->list); + + INIT_LIST_HEAD(&e->list); + INIT_HLIST_NODE(&e->hlist); + ep->nr_allocated++; + + return e; +} + +static void free_entry(struct entry_pool *ep, struct entry *e) +{ + BUG_ON(!ep->nr_allocated); + ep->nr_allocated--; + INIT_HLIST_NODE(&e->hlist); + list_add(&e->list, &ep->free); +} + +/* + * Returns NULL if the entry is free. + */ +static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock) +{ + struct entry *e = ep->entries + from_cblock(cblock); + return !hlist_unhashed(&e->hlist) ? e : NULL; +} + +static bool epool_empty(struct entry_pool *ep) +{ + return list_empty(&ep->free); +} + +static bool in_pool(struct entry_pool *ep, struct entry *e) +{ + return e >= ep->entries && e < ep->entries_end; +} + +static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e) +{ + return to_cblock(e - ep->entries); +} + +/*----------------------------------------------------------------*/ + struct mq_policy { struct dm_cache_policy policy; @@ -238,13 +339,22 @@ struct mq_policy { struct io_tracker tracker; /* - * We maintain two queues of entries. The cache proper contains - * the currently active mappings. Whereas the pre_cache tracks - * blocks that are being hit frequently and potential candidates - * for promotion to the cache. + * Entries come from two pools, one of pre-cache entries, and one + * for the cache proper. + */ + struct entry_pool pre_cache_pool; + struct entry_pool cache_pool; + + /* + * We maintain three queues of entries. The cache proper, + * consisting of a clean and dirty queue, contains the currently + * active mappings. Whereas the pre_cache tracks blocks that + * are being hit frequently and potential candidates for promotion + * to the cache. */ struct queue pre_cache; - struct queue cache; + struct queue cache_clean; + struct queue cache_dirty; /* * Keeps track of time, incremented by the core. We use this to @@ -282,25 +392,6 @@ struct mq_policy { unsigned promote_threshold; /* - * We need cache_size entries for the cache, and choose to have - * cache_size entries for the pre_cache too. One motivation for - * using the same size is to make the hit counts directly - * comparable between pre_cache and cache. - */ - unsigned nr_entries; - unsigned nr_entries_allocated; - struct list_head free; - - /* - * Cache blocks may be unallocated. We store this info in a - * bitset. - */ - unsigned long *allocation_bitset; - unsigned nr_cblocks_allocated; - unsigned find_free_nr_words; - unsigned find_free_last_word; - - /* * The hash table allows us to quickly find an entry by origin * block. Both pre_cache and cache entries are in here. */ @@ -310,49 +401,6 @@ struct mq_policy { }; /*----------------------------------------------------------------*/ -/* Free/alloc mq cache entry structures. */ -static void takeout_queue(struct list_head *lh, struct queue *q) -{ - unsigned level; - - for (level = 0; level < NR_QUEUE_LEVELS; level++) - list_splice(q->qs + level, lh); -} - -static void free_entries(struct mq_policy *mq) -{ - struct entry *e, *tmp; - - takeout_queue(&mq->free, &mq->pre_cache); - takeout_queue(&mq->free, &mq->cache); - - list_for_each_entry_safe(e, tmp, &mq->free, list) - kmem_cache_free(mq_entry_cache, e); -} - -static int alloc_entries(struct mq_policy *mq, unsigned elts) -{ - unsigned u = mq->nr_entries; - - INIT_LIST_HEAD(&mq->free); - mq->nr_entries_allocated = 0; - - while (u--) { - struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL); - - if (!e) { - free_entries(mq); - return -ENOMEM; - } - - - list_add(&e->list, &mq->free); - } - - return 0; -} - -/*----------------------------------------------------------------*/ /* * Simple hash table implementation. Should replace with the standard hash @@ -388,96 +436,14 @@ static void hash_remove(struct entry *e) /*----------------------------------------------------------------*/ -/* - * Allocates a new entry structure. The memory is allocated in one lump, - * so we just handing it out here. Returns NULL if all entries have - * already been allocated. Cannot fail otherwise. - */ -static struct entry *alloc_entry(struct mq_policy *mq) -{ - struct entry *e; - - if (mq->nr_entries_allocated >= mq->nr_entries) { - BUG_ON(!list_empty(&mq->free)); - return NULL; - } - - e = list_entry(list_pop(&mq->free), struct entry, list); - INIT_LIST_HEAD(&e->list); - INIT_HLIST_NODE(&e->hlist); - - mq->nr_entries_allocated++; - return e; -} - -/*----------------------------------------------------------------*/ - -/* - * Mark cache blocks allocated or not in the bitset. - */ -static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock) -{ - BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); - BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset)); - - set_bit(from_cblock(cblock), mq->allocation_bitset); - mq->nr_cblocks_allocated++; -} - -static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock) -{ - BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); - BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset)); - - clear_bit(from_cblock(cblock), mq->allocation_bitset); - mq->nr_cblocks_allocated--; -} - static bool any_free_cblocks(struct mq_policy *mq) { - return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); + return !epool_empty(&mq->cache_pool); } -/* - * Fills result out with a cache block that isn't in use, or return - * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is - * reponsible for that. - */ -static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end, - dm_cblock_t *result, unsigned *last_word) +static bool any_clean_cblocks(struct mq_policy *mq) { - int r = -ENOSPC; - unsigned w; - - for (w = begin; w < end; w++) { - /* - * ffz is undefined if no zero exists - */ - if (mq->allocation_bitset[w] != ~0UL) { - *last_word = w; - *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w])); - if (from_cblock(*result) < from_cblock(mq->cache_size)) - r = 0; - - break; - } - } - - return r; -} - -static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result) -{ - int r; - - if (!any_free_cblocks(mq)) - return -ENOSPC; - - r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word); - if (r == -ENOSPC && mq->find_free_last_word) - r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word); - - return r; + return !queue_empty(&mq->cache_clean); } /*----------------------------------------------------------------*/ @@ -496,33 +462,35 @@ static unsigned queue_level(struct entry *e) return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); } +static bool in_cache(struct mq_policy *mq, struct entry *e) +{ + return in_pool(&mq->cache_pool, e); +} + /* * Inserts the entry into the pre_cache or the cache. Ensures the cache - * block is marked as allocated if necc. Inserts into the hash table. Sets the - * tick which records when the entry was last moved about. + * block is marked as allocated if necc. Inserts into the hash table. + * Sets the tick which records when the entry was last moved about. */ static void push(struct mq_policy *mq, struct entry *e) { e->tick = mq->tick; hash_insert(mq, e); - if (e->in_cache) { - alloc_cblock(mq, e->cblock); - queue_push(&mq->cache, queue_level(e), &e->list); - } else + if (in_cache(mq, e)) + queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, + queue_level(e), &e->list); + else queue_push(&mq->pre_cache, queue_level(e), &e->list); } /* * Removes an entry from pre_cache or cache. Removes from the hash table. - * Frees off the cache block if necc. */ static void del(struct mq_policy *mq, struct entry *e) { queue_remove(&e->list); hash_remove(e); - if (e->in_cache) - free_cblock(mq, e->cblock); } /* @@ -531,14 +499,14 @@ static void del(struct mq_policy *mq, struct entry *e) */ static struct entry *pop(struct mq_policy *mq, struct queue *q) { - struct entry *e = container_of(queue_pop(q), struct entry, list); + struct entry *e; + struct list_head *h = queue_pop(q); - if (e) { - hash_remove(e); + if (!h) + return NULL; - if (e->in_cache) - free_cblock(mq, e->cblock); - } + e = container_of(h, struct entry, list); + hash_remove(e); return e; } @@ -556,7 +524,8 @@ static bool updated_this_tick(struct mq_policy *mq, struct entry *e) * of the entries. * * At the moment the threshold is taken by averaging the hit counts of some - * of the entries in the cache (the first 20 entries of the first level). + * of the entries in the cache (the first 20 entries across all levels in + * ascending order, giving preference to the clean entries at each level). * * We can be much cleverer than this though. For example, each promotion * could bump up the threshold helping to prevent churn. Much more to do @@ -571,14 +540,21 @@ static void check_generation(struct mq_policy *mq) struct list_head *head; struct entry *e; - if ((mq->hit_count >= mq->generation_period) && - (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) { - + if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) { mq->hit_count = 0; mq->generation++; for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { - head = mq->cache.qs + level; + head = mq->cache_clean.qs + level; + list_for_each_entry(e, head, list) { + nr++; + total += e->hit_count; + + if (++count >= MAX_TO_AVERAGE) + break; + } + + head = mq->cache_dirty.qs + level; list_for_each_entry(e, head, list) { nr++; total += e->hit_count; @@ -631,19 +607,30 @@ static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) * - set the hit count to a hard coded value other than 1, eg, is it better * if it goes in at level 2? */ -static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) { - dm_cblock_t result; - struct entry *demoted = pop(mq, &mq->cache); + struct entry *demoted = pop(mq, &mq->cache_clean); + + if (!demoted) + /* + * We could get a block from mq->cache_dirty, but that + * would add extra latency to the triggering bio as it + * waits for the writeback. Better to not promote this + * time and hope there's a clean block next time this block + * is hit. + */ + return -ENOSPC; - BUG_ON(!demoted); - result = demoted->cblock; *oblock = demoted->oblock; - demoted->in_cache = false; - demoted->hit_count = 1; - push(mq, demoted); + free_entry(&mq->cache_pool, demoted); + + /* + * We used to put the demoted block into the pre-cache, but I think + * it's simpler to just let it work it's way up from zero again. + * Stops blocks flickering in and out of the cache. + */ - return result; + return 0; } /* @@ -662,17 +649,18 @@ static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) static unsigned adjusted_promote_threshold(struct mq_policy *mq, bool discarded_oblock, int data_dir) { - if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE) + if (data_dir == READ) + return mq->promote_threshold + READ_PROMOTE_THRESHOLD; + + if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) { /* * We don't need to do any copying at all, so give this a - * very low threshold. In practice this only triggers - * during initial population after a format. + * very low threshold. */ return DISCARDED_PROMOTE_THRESHOLD; + } - return data_dir == READ ? - (mq->promote_threshold + READ_PROMOTE_THRESHOLD) : - (mq->promote_threshold + WRITE_PROMOTE_THRESHOLD); + return mq->promote_threshold + WRITE_PROMOTE_THRESHOLD; } static bool should_promote(struct mq_policy *mq, struct entry *e, @@ -688,34 +676,49 @@ static int cache_entry_found(struct mq_policy *mq, { requeue_and_update_tick(mq, e); - if (e->in_cache) { + if (in_cache(mq, e)) { result->op = POLICY_HIT; - result->cblock = e->cblock; + result->cblock = infer_cblock(&mq->cache_pool, e); } return 0; } /* - * Moves and entry from the pre_cache to the cache. The main work is + * Moves an entry from the pre_cache to the cache. The main work is * finding which cache block to use. */ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, struct policy_result *result) { - dm_cblock_t cblock; + int r; + struct entry *new_e; - if (find_free_cblock(mq, &cblock) == -ENOSPC) { + /* Ensure there's a free cblock in the cache */ + if (epool_empty(&mq->cache_pool)) { result->op = POLICY_REPLACE; - cblock = demote_cblock(mq, &result->old_oblock); + r = demote_cblock(mq, &result->old_oblock); + if (r) { + result->op = POLICY_MISS; + return 0; + } } else result->op = POLICY_NEW; - result->cblock = e->cblock = cblock; + new_e = alloc_entry(&mq->cache_pool); + BUG_ON(!new_e); + + new_e->oblock = e->oblock; + new_e->dirty = false; + new_e->hit_count = e->hit_count; + new_e->generation = e->generation; + new_e->tick = e->tick; del(mq, e); - e->in_cache = true; - push(mq, e); + free_entry(&mq->pre_cache_pool, e); + push(mq, new_e); + + result->cblock = infer_cblock(&mq->cache_pool, new_e); return 0; } @@ -743,7 +746,7 @@ static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, static void insert_in_pre_cache(struct mq_policy *mq, dm_oblock_t oblock) { - struct entry *e = alloc_entry(mq); + struct entry *e = alloc_entry(&mq->pre_cache_pool); if (!e) /* @@ -757,7 +760,7 @@ static void insert_in_pre_cache(struct mq_policy *mq, return; } - e->in_cache = false; + e->dirty = false; e->oblock = oblock; e->hit_count = 1; e->generation = mq->generation; @@ -767,30 +770,36 @@ static void insert_in_pre_cache(struct mq_policy *mq, static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, struct policy_result *result) { + int r; struct entry *e; - dm_cblock_t cblock; - if (find_free_cblock(mq, &cblock) == -ENOSPC) { - result->op = POLICY_MISS; - insert_in_pre_cache(mq, oblock); - return; - } + if (epool_empty(&mq->cache_pool)) { + result->op = POLICY_REPLACE; + r = demote_cblock(mq, &result->old_oblock); + if (unlikely(r)) { + result->op = POLICY_MISS; + insert_in_pre_cache(mq, oblock); + return; + } - e = alloc_entry(mq); - if (unlikely(!e)) { - result->op = POLICY_MISS; - return; + /* + * This will always succeed, since we've just demoted. + */ + e = alloc_entry(&mq->cache_pool); + BUG_ON(!e); + + } else { + e = alloc_entry(&mq->cache_pool); + result->op = POLICY_NEW; } e->oblock = oblock; - e->cblock = cblock; - e->in_cache = true; + e->dirty = false; e->hit_count = 1; e->generation = mq->generation; push(mq, e); - result->op = POLICY_NEW; - result->cblock = e->cblock; + result->cblock = infer_cblock(&mq->cache_pool, e); } static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, @@ -821,13 +830,16 @@ static int map(struct mq_policy *mq, dm_oblock_t oblock, int r = 0; struct entry *e = hash_lookup(mq, oblock); - if (e && e->in_cache) + if (e && in_cache(mq, e)) r = cache_entry_found(mq, e, result); + else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) result->op = POLICY_MISS; + else if (e) r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, data_dir, result); + else r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, data_dir, result); @@ -854,9 +866,9 @@ static void mq_destroy(struct dm_cache_policy *p) { struct mq_policy *mq = to_mq_policy(p); - free_bitset(mq->allocation_bitset); kfree(mq->table); - free_entries(mq); + epool_exit(&mq->cache_pool); + epool_exit(&mq->pre_cache_pool); kfree(mq); } @@ -904,8 +916,8 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t return -EWOULDBLOCK; e = hash_lookup(mq, oblock); - if (e && e->in_cache) { - *cblock = e->cblock; + if (e && in_cache(mq, e)) { + *cblock = infer_cblock(&mq->cache_pool, e); r = 0; } else r = -ENOENT; @@ -915,6 +927,36 @@ static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t return r; } +static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set) +{ + struct entry *e; + + e = hash_lookup(mq, oblock); + BUG_ON(!e || !in_cache(mq, e)); + + del(mq, e); + e->dirty = set; + push(mq, e); +} + +static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + __mq_set_clear_dirty(mq, oblock, true); + mutex_unlock(&mq->lock); +} + +static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + __mq_set_clear_dirty(mq, oblock, false); + mutex_unlock(&mq->lock); +} + static int mq_load_mapping(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t cblock, uint32_t hint, bool hint_valid) @@ -922,13 +964,9 @@ static int mq_load_mapping(struct dm_cache_policy *p, struct mq_policy *mq = to_mq_policy(p); struct entry *e; - e = alloc_entry(mq); - if (!e) - return -ENOMEM; - - e->cblock = cblock; + e = alloc_particular_entry(&mq->cache_pool, cblock); e->oblock = oblock; - e->in_cache = true; + e->dirty = false; /* this gets corrected in a minute */ e->hit_count = hint_valid ? hint : 1; e->generation = mq->generation; push(mq, e); @@ -936,57 +974,126 @@ static int mq_load_mapping(struct dm_cache_policy *p, return 0; } +static int mq_save_hints(struct mq_policy *mq, struct queue *q, + policy_walk_fn fn, void *context) +{ + int r; + unsigned level; + struct entry *e; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_for_each_entry(e, q->qs + level, list) { + r = fn(context, infer_cblock(&mq->cache_pool, e), + e->oblock, e->hit_count); + if (r) + return r; + } + + return 0; +} + static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, void *context) { struct mq_policy *mq = to_mq_policy(p); int r = 0; - struct entry *e; - unsigned level; mutex_lock(&mq->lock); - for (level = 0; level < NR_QUEUE_LEVELS; level++) - list_for_each_entry(e, &mq->cache.qs[level], list) { - r = fn(context, e->cblock, e->oblock, e->hit_count); - if (r) - goto out; - } + r = mq_save_hints(mq, &mq->cache_clean, fn, context); + if (!r) + r = mq_save_hints(mq, &mq->cache_dirty, fn, context); -out: mutex_unlock(&mq->lock); return r; } +static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) +{ + struct entry *e; + + e = hash_lookup(mq, oblock); + BUG_ON(!e || !in_cache(mq, e)); + + del(mq, e); + free_entry(&mq->cache_pool, e); +} + static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) { struct mq_policy *mq = to_mq_policy(p); - struct entry *e; mutex_lock(&mq->lock); + __remove_mapping(mq, oblock); + mutex_unlock(&mq->lock); +} - e = hash_lookup(mq, oblock); +static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ + struct entry *e = epool_find(&mq->cache_pool, cblock); - BUG_ON(!e || !e->in_cache); + if (!e) + return -ENODATA; del(mq, e); - e->in_cache = false; - push(mq, e); + free_entry(&mq->cache_pool, e); + return 0; +} + +static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + r = __remove_cblock(mq, cblock); mutex_unlock(&mq->lock); + + return r; } -static void force_mapping(struct mq_policy *mq, - dm_oblock_t current_oblock, dm_oblock_t new_oblock) +static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, + dm_cblock_t *cblock) { - struct entry *e = hash_lookup(mq, current_oblock); + struct entry *e = pop(mq, &mq->cache_dirty); - BUG_ON(!e || !e->in_cache); + if (!e) + return -ENODATA; - del(mq, e); - e->oblock = new_oblock; + *oblock = e->oblock; + *cblock = infer_cblock(&mq->cache_pool, e); + e->dirty = false; push(mq, e); + + return 0; +} + +static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + r = __mq_writeback_work(mq, oblock, cblock); + mutex_unlock(&mq->lock); + + return r; +} + +static void __force_mapping(struct mq_policy *mq, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + struct entry *e = hash_lookup(mq, current_oblock); + + if (e && in_cache(mq, e)) { + del(mq, e); + e->oblock = new_oblock; + e->dirty = true; + push(mq, e); + } } static void mq_force_mapping(struct dm_cache_policy *p, @@ -995,16 +1102,20 @@ static void mq_force_mapping(struct dm_cache_policy *p, struct mq_policy *mq = to_mq_policy(p); mutex_lock(&mq->lock); - force_mapping(mq, current_oblock, new_oblock); + __force_mapping(mq, current_oblock, new_oblock); mutex_unlock(&mq->lock); } static dm_cblock_t mq_residency(struct dm_cache_policy *p) { + dm_cblock_t r; struct mq_policy *mq = to_mq_policy(p); - /* FIXME: lock mutex, not sure we can block here */ - return to_cblock(mq->nr_cblocks_allocated); + mutex_lock(&mq->lock); + r = to_cblock(mq->cache_pool.nr_allocated); + mutex_unlock(&mq->lock); + + return r; } static void mq_tick(struct dm_cache_policy *p) @@ -1057,10 +1168,13 @@ static void init_policy_functions(struct mq_policy *mq) mq->policy.destroy = mq_destroy; mq->policy.map = mq_map; mq->policy.lookup = mq_lookup; + mq->policy.set_dirty = mq_set_dirty; + mq->policy.clear_dirty = mq_clear_dirty; mq->policy.load_mapping = mq_load_mapping; mq->policy.walk_mappings = mq_walk_mappings; mq->policy.remove_mapping = mq_remove_mapping; - mq->policy.writeback_work = NULL; + mq->policy.remove_cblock = mq_remove_cblock; + mq->policy.writeback_work = mq_writeback_work; mq->policy.force_mapping = mq_force_mapping; mq->policy.residency = mq_residency; mq->policy.tick = mq_tick; @@ -1072,7 +1186,6 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size) { - int r; struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); if (!mq) @@ -1080,8 +1193,18 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, init_policy_functions(mq); iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); - mq->cache_size = cache_size; + + if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) { + DMERR("couldn't initialize pool of pre-cache entries"); + goto bad_pre_cache_init; + } + + if (epool_init(&mq->cache_pool, from_cblock(cache_size))) { + DMERR("couldn't initialize pool of cache entries"); + goto bad_cache_init; + } + mq->tick_protected = 0; mq->tick = 0; mq->hit_count = 0; @@ -1089,20 +1212,12 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, mq->promote_threshold = 0; mutex_init(&mq->lock); spin_lock_init(&mq->tick_lock); - mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG); - mq->find_free_last_word = 0; queue_init(&mq->pre_cache); - queue_init(&mq->cache); - mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); + queue_init(&mq->cache_clean); + queue_init(&mq->cache_dirty); - mq->nr_entries = 2 * from_cblock(cache_size); - r = alloc_entries(mq, mq->nr_entries); - if (r) - goto bad_cache_alloc; - - mq->nr_entries_allocated = 0; - mq->nr_cblocks_allocated = 0; + mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); mq->hash_bits = ffs(mq->nr_buckets) - 1; @@ -1110,17 +1225,13 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, if (!mq->table) goto bad_alloc_table; - mq->allocation_bitset = alloc_bitset(from_cblock(cache_size)); - if (!mq->allocation_bitset) - goto bad_alloc_bitset; - return &mq->policy; -bad_alloc_bitset: - kfree(mq->table); bad_alloc_table: - free_entries(mq); -bad_cache_alloc: + epool_exit(&mq->cache_pool); +bad_cache_init: + epool_exit(&mq->pre_cache_pool); +bad_pre_cache_init: kfree(mq); return NULL; @@ -1130,7 +1241,7 @@ bad_cache_alloc: static struct dm_cache_policy_type mq_policy_type = { .name = "mq", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create @@ -1138,7 +1249,7 @@ static struct dm_cache_policy_type mq_policy_type = { static struct dm_cache_policy_type default_policy_type = { .name = "default", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create |