diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 135 |
1 files changed, 97 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8c200e8..df1234c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2258,7 +2258,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) switch (ctype) { case MEM_CGROUP_CHARGE_TYPE_MAPPED: case MEM_CGROUP_CHARGE_TYPE_DROP: - if (page_mapped(page)) + /* See mem_cgroup_prepare_migration() */ + if (page_mapped(page) || PageCgroupMigration(pc)) goto unlock_out; break; case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: @@ -2481,10 +2482,12 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, * Before starting migration, account PAGE_SIZE to mem_cgroup that the old * page belongs to. */ -int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) +int mem_cgroup_prepare_migration(struct page *page, + struct page *newpage, struct mem_cgroup **ptr) { struct page_cgroup *pc; struct mem_cgroup *mem = NULL; + enum charge_type ctype; int ret = 0; if (mem_cgroup_disabled()) @@ -2495,69 +2498,125 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr) if (PageCgroupUsed(pc)) { mem = pc->mem_cgroup; css_get(&mem->css); + /* + * At migrating an anonymous page, its mapcount goes down + * to 0 and uncharge() will be called. But, even if it's fully + * unmapped, migration may fail and this page has to be + * charged again. We set MIGRATION flag here and delay uncharge + * until end_migration() is called + * + * Corner Case Thinking + * A) + * When the old page was mapped as Anon and it's unmap-and-freed + * while migration was ongoing. + * If unmap finds the old page, uncharge() of it will be delayed + * until end_migration(). If unmap finds a new page, it's + * uncharged when it make mapcount to be 1->0. If unmap code + * finds swap_migration_entry, the new page will not be mapped + * and end_migration() will find it(mapcount==0). + * + * B) + * When the old page was mapped but migraion fails, the kernel + * remaps it. A charge for it is kept by MIGRATION flag even + * if mapcount goes down to 0. We can do remap successfully + * without charging it again. + * + * C) + * The "old" page is under lock_page() until the end of + * migration, so, the old page itself will not be swapped-out. + * If the new page is swapped out before end_migraton, our + * hook to usual swap-out path will catch the event. + */ + if (PageAnon(page)) + SetPageCgroupMigration(pc); } unlock_page_cgroup(pc); + /* + * If the page is not charged at this point, + * we return here. + */ + if (!mem) + return 0; *ptr = mem; - if (mem) { - ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); - css_put(&mem->css); + ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false); + css_put(&mem->css);/* drop extra refcnt */ + if (ret || *ptr == NULL) { + if (PageAnon(page)) { + lock_page_cgroup(pc); + ClearPageCgroupMigration(pc); + unlock_page_cgroup(pc); + /* + * The old page may be fully unmapped while we kept it. + */ + mem_cgroup_uncharge_page(page); + } + return -ENOMEM; } + /* + * We charge new page before it's used/mapped. So, even if unlock_page() + * is called before end_migration, we can catch all events on this new + * page. In the case new page is migrated but not remapped, new page's + * mapcount will be finally 0 and we call uncharge in end_migration(). + */ + pc = lookup_page_cgroup(newpage); + if (PageAnon(page)) + ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; + else if (page_is_file_cache(page)) + ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; + else + ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; + __mem_cgroup_commit_charge(mem, pc, ctype); return ret; } /* remove redundant charge if migration failed*/ void mem_cgroup_end_migration(struct mem_cgroup *mem, - struct page *oldpage, struct page *newpage) + struct page *oldpage, struct page *newpage) { - struct page *target, *unused; + struct page *used, *unused; struct page_cgroup *pc; - enum charge_type ctype; if (!mem) return; + /* blocks rmdir() */ cgroup_exclude_rmdir(&mem->css); /* at migration success, oldpage->mapping is NULL. */ if (oldpage->mapping) { - target = oldpage; - unused = NULL; + used = oldpage; + unused = newpage; } else { - target = newpage; + used = newpage; unused = oldpage; } - - if (PageAnon(target)) - ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; - else if (page_is_file_cache(target)) - ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; - else - ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; - - /* unused page is not on radix-tree now. */ - if (unused) - __mem_cgroup_uncharge_common(unused, ctype); - - pc = lookup_page_cgroup(target); /* - * __mem_cgroup_commit_charge() check PCG_USED bit of page_cgroup. - * So, double-counting is effectively avoided. + * We disallowed uncharge of pages under migration because mapcount + * of the page goes down to zero, temporarly. + * Clear the flag and check the page should be charged. */ - __mem_cgroup_commit_charge(mem, pc, ctype); + pc = lookup_page_cgroup(oldpage); + lock_page_cgroup(pc); + ClearPageCgroupMigration(pc); + unlock_page_cgroup(pc); + if (unused != oldpage) + pc = lookup_page_cgroup(unused); + __mem_cgroup_uncharge_common(unused, MEM_CGROUP_CHARGE_TYPE_FORCE); + + pc = lookup_page_cgroup(used); /* - * Both of oldpage and newpage are still under lock_page(). - * Then, we don't have to care about race in radix-tree. - * But we have to be careful that this page is unmapped or not. - * - * There is a case for !page_mapped(). At the start of - * migration, oldpage was mapped. But now, it's zapped. - * But we know *target* page is not freed/reused under us. - * mem_cgroup_uncharge_page() does all necessary checks. + * If a page is a file cache, radix-tree replacement is very atomic + * and we can skip this check. When it was an Anon page, its mapcount + * goes down to 0. But because we added MIGRATION flage, it's not + * uncharged yet. There are several case but page->mapcount check + * and USED bit check in mem_cgroup_uncharge_page() will do enough + * check. (see prepare_charge() also) */ - if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) - mem_cgroup_uncharge_page(target); + if (PageAnon(used)) + mem_cgroup_uncharge_page(used); /* - * At migration, we may charge account against cgroup which has no tasks + * At migration, we may charge account against cgroup which has no + * tasks. * So, rmdir()->pre_destroy() can be called while we do this charge. * In that case, we need to call pre_destroy() again. check it here. */ |