summaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2013-09-11 14:22:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 15:57:48 -0700
commitc8721bbbdd36382de51cd6b7a56322e0acca2414 (patch)
tree8fb7b55974defcde9a4b07f571f0dd2dd1ad591f /mm/hugetlb.c
parent71ea2efb1e936a127690a0a540b3a6162f95e48a (diff)
downloadop-kernel-dev-c8721bbbdd36382de51cd6b7a56322e0acca2414.zip
op-kernel-dev-c8721bbbdd36382de51cd6b7a56322e0acca2414.tar.gz
mm: memory-hotplug: enable memory hotplug to handle hugepage
Until now we can't offline memory blocks which contain hugepages because a hugepage is considered as an unmovable page. But now with this patch series, a hugepage has become movable, so by using hugepage migration we can offline such memory blocks. What's different from other users of hugepage migration is that we need to decompose all the hugepages inside the target memory block into free buddy pages after hugepage migration, because otherwise free hugepages remaining in the memory block intervene the memory offlining. For this reason we introduce new functions dissolve_free_huge_page() and dissolve_free_huge_pages(). Other than that, what this patch does is straightforwardly to add hugepage migration code, that is, adding hugepage code to the functions which scan over pfn and collect hugepages to be migrated, and adding a hugepage allocation function to alloc_migrate_target(). As for larger hugepages (1GB for x86_64), it's not easy to do hotremove over them because it's larger than memory block. So we now simply leave it to fail as it is. [yongjun_wei@trendmicro.com.cn: remove duplicated include] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Acked-by: Andi Kleen <ak@linux.intel.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Hugh Dickins <hughd@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Rik van Riel <riel@redhat.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c71
1 files changed, 69 insertions, 2 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d37b3b9..fb4293b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -21,6 +21,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/page-isolation.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -522,9 +523,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
- if (list_empty(&h->hugepage_freelists[nid]))
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+ if (!is_migrate_isolate_page(page))
+ break;
+ /*
+ * if 'non-isolated free hugepage' not found on the list,
+ * the allocation fails.
+ */
+ if (&h->hugepage_freelists[nid] == &page->lru)
return NULL;
- page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
list_move(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page);
h->free_huge_pages--;
@@ -878,6 +885,44 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
return ret;
}
+/*
+ * Dissolve a given free hugepage into free buddy pages. This function does
+ * nothing for in-use (including surplus) hugepages.
+ */
+static void dissolve_free_huge_page(struct page *page)
+{
+ spin_lock(&hugetlb_lock);
+ if (PageHuge(page) && !page_count(page)) {
+ struct hstate *h = page_hstate(page);
+ int nid = page_to_nid(page);
+ list_del(&page->lru);
+ h->free_huge_pages--;
+ h->free_huge_pages_node[nid]--;
+ update_and_free_page(h, page);
+ }
+ spin_unlock(&hugetlb_lock);
+}
+
+/*
+ * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
+ * make specified memory blocks removable from the system.
+ * Note that start_pfn should aligned with (minimum) hugepage size.
+ */
+void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned int order = 8 * sizeof(void *);
+ unsigned long pfn;
+ struct hstate *h;
+
+ /* Set scan step to minimum hugepage size */
+ for_each_hstate(h)
+ if (order > huge_page_order(h))
+ order = huge_page_order(h);
+ VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order));
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order)
+ dissolve_free_huge_page(pfn_to_page(pfn));
+}
+
static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
{
struct page *page;
@@ -3457,3 +3502,25 @@ void putback_active_hugepage(struct page *page)
spin_unlock(&hugetlb_lock);
put_page(page);
}
+
+bool is_hugepage_active(struct page *page)
+{
+ VM_BUG_ON(!PageHuge(page));
+ /*
+ * This function can be called for a tail page because the caller,
+ * scan_movable_pages, scans through a given pfn-range which typically
+ * covers one memory block. In systems using gigantic hugepage (1GB
+ * for x86_64,) a hugepage is larger than a memory block, and we don't
+ * support migrating such large hugepages for now, so return false
+ * when called for tail pages.
+ */
+ if (PageTail(page))
+ return false;
+ /*
+ * Refcount of a hwpoisoned hugepages is 1, but they are not active,
+ * so we should return false for them.
+ */
+ if (unlikely(PageHWPoison(page)))
+ return false;
+ return page_count(page) > 0;
+}
OpenPOWER on IntegriCloud