From 5e928f77a09a07f9dd595bb8a489965d69a83458 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 18 Aug 2009 23:38:32 +0200 Subject: PM: Introduce core framework for run-time PM of I/O devices (rev. 17) Introduce a core framework for run-time power management of I/O devices. Add device run-time PM fields to 'struct dev_pm_info' and device run-time PM callbacks to 'struct dev_pm_ops'. Introduce a run-time PM workqueue and define some device run-time PM helper functions at the core level. Document all these things. Special thanks to Alan Stern for his help with the design and multiple detailed reviews of the pereceding versions of this patch and to Magnus Damm for testing feedback. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- kernel/power/Kconfig | 14 ++++++++++++++ kernel/power/main.c | 17 +++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 72067cb..91e09d3 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -208,3 +208,17 @@ config APM_EMULATION random kernel OOPSes or reboots that don't seem to be related to anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). + +config PM_RUNTIME + bool "Run-time PM core functionality" + depends on PM + ---help--- + Enable functionality allowing I/O devices to be put into energy-saving + (low power) states at run time (or autosuspended) after a specified + period of inactivity and woken up in response to a hardware-generated + wake-up event or a driver's request. + + Hardware support is generally required for this functionality to work + and the bus type drivers of the buses the devices are on are + responsible for the actual handling of the autosuspend requests and + wake-up events. diff --git a/kernel/power/main.c b/kernel/power/main.c index f710e36..347d2cc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "power.h" @@ -217,8 +218,24 @@ static struct attribute_group attr_group = { .attrs = g, }; +#ifdef CONFIG_PM_RUNTIME +struct workqueue_struct *pm_wq; + +static int __init pm_start_workqueue(void) +{ + pm_wq = create_freezeable_workqueue("pm"); + + return pm_wq ? 0 : -ENOMEM; +} +#else +static inline int pm_start_workqueue(void) { return 0; } +#endif + static int __init pm_init(void) { + int error = pm_start_workqueue(); + if (error) + return error; power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; -- cgit v1.1 From e681c9dd62fe8fcc5bba28a3ca3f7dc8be940206 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 8 Jul 2009 13:23:32 +0200 Subject: PM: Fix typo in label name s/Platofrm_finish/Platform_finish/ Although the same label name is used somewhere else in the file, this particular label was consistently typoed in all of its uses. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 81d2e74..ec82025 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -460,11 +460,11 @@ int hibernation_platform_enter(void) error = hibernation_ops->prepare(); if (error) - goto Platofrm_finish; + goto Platform_finish; error = disable_nonboot_cpus(); if (error) - goto Platofrm_finish; + goto Platform_finish; local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); @@ -476,7 +476,7 @@ int hibernation_platform_enter(void) * We don't need to reenable the nonboot CPUs or resume consoles, since * the system is going to be halted anyway. */ - Platofrm_finish: + Platform_finish: hibernation_ops->finish(); dpm_suspend_noirq(PMSG_RESTORE); -- cgit v1.1 From 4bb334353ebd821bc8eeabeb019eaac33c7307df Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 8 Jul 2009 13:23:51 +0200 Subject: PM/Hibernate: Rework shrinking of memory Rework swsusp_shrink_memory() so that it calls shrink_all_memory() just once to make some room for the image and then allocates memory to apply more pressure to the memory management subsystem, if necessary. Unfortunately, we don't seem to be able to drop shrink_all_memory() entirely just yet, because that would lead to huge performance regressions in some test cases. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek --- kernel/power/snapshot.c | 205 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 158 insertions(+), 47 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 523a451..a3a175f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1066,69 +1066,180 @@ void swsusp_free(void) buffer = NULL; } +/* Helper functions used for the shrinking of memory. */ + +#define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) + /** - * swsusp_shrink_memory - Try to free as much memory as needed - * - * ... but do not OOM-kill anyone + * preallocate_image_pages - Allocate a number of pages for hibernation image + * @nr_pages: Number of page frames to allocate. + * @mask: GFP flags to use for the allocation. * - * Notice: all userland should be stopped before it is called, or - * livelock is possible. + * Return value: Number of page frames actually allocated */ +static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) +{ + unsigned long nr_alloc = 0; + + while (nr_pages > 0) { + if (!alloc_image_page(mask)) + break; + nr_pages--; + nr_alloc++; + } -#define SHRINK_BITE 10000 -static inline unsigned long __shrink_memory(long tmp) + return nr_alloc; +} + +static unsigned long preallocate_image_memory(unsigned long nr_pages) +{ + return preallocate_image_pages(nr_pages, GFP_IMAGE); +} + +#ifdef CONFIG_HIGHMEM +static unsigned long preallocate_image_highmem(unsigned long nr_pages) { - if (tmp > SHRINK_BITE) - tmp = SHRINK_BITE; - return shrink_all_memory(tmp); + return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); } +/** + * __fraction - Compute (an approximation of) x * (multiplier / base) + */ +static unsigned long __fraction(u64 x, u64 multiplier, u64 base) +{ + x *= multiplier; + do_div(x, base); + return (unsigned long)x; +} + +static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + unsigned long alloc = __fraction(nr_pages, highmem, total); + + return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); +} +#else /* CONFIG_HIGHMEM */ +static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) +{ + return 0; +} + +static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, + unsigned long highmem, + unsigned long total) +{ + return 0; +} +#endif /* CONFIG_HIGHMEM */ + +/** + * swsusp_shrink_memory - Make the kernel release as much memory as needed + * + * To create a hibernation image it is necessary to make a copy of every page + * frame in use. We also need a number of page frames to be free during + * hibernation for allocations made while saving the image and for device + * drivers, in case they need to allocate memory from their hibernation + * callbacks (these two numbers are given by PAGES_FOR_IO and SPARE_PAGES, + * respectively, both of which are rough estimates). To make this happen, we + * compute the total number of available page frames and allocate at least + * + * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 + 2 * SPARE_PAGES + * + * of them, which corresponds to the maximum size of a hibernation image. + * + * If image_size is set below the number following from the above formula, + * the preallocation of memory is continued until the total number of saveable + * pages in the system is below the requested image size or it is impossible to + * allocate more memory, whichever happens first. + */ int swsusp_shrink_memory(void) { - long tmp; struct zone *zone; - unsigned long pages = 0; - unsigned int i = 0; - char *p = "-\\|/"; + unsigned long saveable, size, max_size, count, highmem, pages = 0; + unsigned long alloc, pages_highmem; struct timeval start, stop; + int error = 0; - printk(KERN_INFO "PM: Shrinking memory... "); + printk(KERN_INFO "PM: Shrinking memory... "); do_gettimeofday(&start); - do { - long size, highmem_size; - - highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; - tmp = size; - size += highmem_size; - for_each_populated_zone(zone) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= - zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } - } - if (highmem_size < 0) - highmem_size = 0; + /* Count the number of saveable data pages. */ + highmem = count_highmem_pages(); + saveable = count_data_pages(); - tmp += highmem_size; - if (tmp > 0) { - tmp = __shrink_memory(tmp); - if (!tmp) - return -ENOMEM; - pages += tmp; - } else if (size > image_size / PAGE_SIZE) { - tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); - pages += tmp; - } - printk("\b%c", p[i++%4]); - } while (tmp > 0); + /* + * Compute the total number of page frames we can use (count) and the + * number of pages needed for image metadata (size). + */ + count = saveable; + saveable += highmem; + size = 0; + for_each_populated_zone(zone) { + size += snapshot_additional_pages(zone); + if (is_highmem(zone)) + highmem += zone_page_state(zone, NR_FREE_PAGES); + else + count += zone_page_state(zone, NR_FREE_PAGES); + } + count += highmem; + count -= totalreserve_pages; + + /* Compute the maximum number of saveable pages to leave in memory. */ + max_size = (count - (size + PAGES_FOR_IO)) / 2 - 2 * SPARE_PAGES; + size = DIV_ROUND_UP(image_size, PAGE_SIZE); + if (size > max_size) + size = max_size; + /* + * If the maximum is not less than the current number of saveable pages + * in memory, we don't need to do anything more. + */ + if (size >= saveable) + goto out; + + /* + * Let the memory management subsystem know that we're going to need a + * large number of page frames to allocate and make it free some memory. + * NOTE: If this is not done, performance will be hurt badly in some + * test cases. + */ + shrink_all_memory(saveable - size); + + /* + * The number of saveable pages in memory was too high, so apply some + * pressure to decrease it. First, make room for the largest possible + * image and fail if that doesn't work. Next, try to decrease the size + * of the image as much as indicated by image_size using allocations + * from highmem and non-highmem zones separately. + */ + pages_highmem = preallocate_image_highmem(highmem / 2); + alloc = (count - max_size) - pages_highmem; + pages = preallocate_image_memory(alloc); + if (pages < alloc) { + error = -ENOMEM; + goto free_out; + } + size = max_size - size; + alloc = size; + size = preallocate_highmem_fraction(size, highmem, count); + pages_highmem += size; + alloc -= size; + pages += preallocate_image_memory(alloc); + pages += pages_highmem; + + free_out: + /* Release all of the preallocated page frames. */ + swsusp_free(); + + if (error) { + printk(KERN_CONT "\n"); + return error; + } + + out: do_gettimeofday(&stop); - printk("\bdone (%lu pages freed)\n", pages); + printk(KERN_CONT "done (preallocated %lu free pages)\n", pages); swsusp_show_speed(&start, &stop, pages, "Freed"); return 0; -- cgit v1.1 From 64a473cb74a88cb4991edf985d55a266e65292e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 8 Jul 2009 13:24:05 +0200 Subject: PM/Hibernate: Do not release preallocated memory unnecessarily (rev. 2) Since the hibernation code is now going to use allocations of memory to make enough room for the image, it can also use the page frames allocated at this stage as image page frames. The low-level hibernation code needs to be rearranged for this purpose, but it allows us to avoid freeing a great number of pages and allocating these same pages once again later, so it generally is worth doing. [rev. 2: Take highmem into account correctly.] Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 15 +++- kernel/power/power.h | 2 +- kernel/power/snapshot.c | 202 +++++++++++++++++++++++++++++++---------------- 3 files changed, 147 insertions(+), 72 deletions(-) (limited to 'kernel') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index ec82025..04b3a83 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -298,8 +298,8 @@ int hibernation_snapshot(int platform_mode) if (error) return error; - /* Free memory before shutting down devices. */ - error = swsusp_shrink_memory(); + /* Preallocate image memory before shutting down devices. */ + error = hibernate_preallocate_memory(); if (error) goto Close; @@ -315,6 +315,10 @@ int hibernation_snapshot(int platform_mode) /* Control returns here after successful restore */ Resume_devices: + /* We may need to release the preallocated image pages here. */ + if (error || !in_suspend) + swsusp_free(); + dpm_resume_end(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); resume_console(); @@ -578,7 +582,10 @@ int hibernate(void) goto Thaw; error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); - if (in_suspend && !error) { + if (error) + goto Thaw; + + if (in_suspend) { unsigned int flags = 0; if (hibernation_mode == HIBERNATION_PLATFORM) @@ -590,8 +597,8 @@ int hibernate(void) power_down(); } else { pr_debug("PM: Image restored successfully.\n"); - swsusp_free(); } + Thaw: thaw_processes(); Finish: diff --git a/kernel/power/power.h b/kernel/power/power.h index 26d5a26..46c5a26 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void); extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); -extern int swsusp_shrink_memory(void); +extern int hibernate_preallocate_memory(void); /** * Auxiliary structure used for reading the snapshot image data and diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index a3a175f..2b1a7bc 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1033,6 +1033,25 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) static unsigned int nr_copy_pages; /* Number of pages needed for saving the original pfns of the image pages */ static unsigned int nr_meta_pages; +/* + * Numbers of normal and highmem page frames allocated for hibernation image + * before suspending devices. + */ +unsigned int alloc_normal, alloc_highmem; +/* + * Memory bitmap used for marking saveable pages (during hibernation) or + * hibernation image pages (during restore) + */ +static struct memory_bitmap orig_bm; +/* + * Memory bitmap used during hibernation for marking allocated page frames that + * will contain copies of saveable pages. During restore it is initially used + * for marking hibernation image pages, but then the set bits from it are + * duplicated in @orig_bm and it is released. On highmem systems it is next + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. + */ +static struct memory_bitmap copy_bm; /** * swsusp_free - free pages allocated for the suspend. @@ -1064,6 +1083,8 @@ void swsusp_free(void) nr_meta_pages = 0; restore_pblist = NULL; buffer = NULL; + alloc_normal = 0; + alloc_highmem = 0; } /* Helper functions used for the shrinking of memory. */ @@ -1082,8 +1103,16 @@ static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) unsigned long nr_alloc = 0; while (nr_pages > 0) { - if (!alloc_image_page(mask)) + struct page *page; + + page = alloc_image_page(mask); + if (!page) break; + memory_bm_set_bit(©_bm, page_to_pfn(page)); + if (PageHighMem(page)) + alloc_highmem++; + else + alloc_normal++; nr_pages--; nr_alloc++; } @@ -1135,7 +1164,47 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, #endif /* CONFIG_HIGHMEM */ /** - * swsusp_shrink_memory - Make the kernel release as much memory as needed + * free_unnecessary_pages - Release preallocated pages not needed for the image + */ +static void free_unnecessary_pages(void) +{ + unsigned long save_highmem, to_free_normal, to_free_highmem; + + to_free_normal = alloc_normal - count_data_pages(); + save_highmem = count_highmem_pages(); + if (alloc_highmem > save_highmem) { + to_free_highmem = alloc_highmem - save_highmem; + } else { + to_free_highmem = 0; + to_free_normal -= save_highmem - alloc_highmem; + } + + memory_bm_position_reset(©_bm); + + while (to_free_normal > 0 && to_free_highmem > 0) { + unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (!to_free_highmem) + continue; + to_free_highmem--; + alloc_highmem--; + } else { + if (!to_free_normal) + continue; + to_free_normal--; + alloc_normal--; + } + memory_bm_clear_bit(©_bm, pfn); + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } +} + +/** + * hibernate_preallocate_memory - Preallocate memory for hibernation image * * To create a hibernation image it is necessary to make a copy of every page * frame in use. We also need a number of page frames to be free during @@ -1154,19 +1223,30 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, * pages in the system is below the requested image size or it is impossible to * allocate more memory, whichever happens first. */ -int swsusp_shrink_memory(void) +int hibernate_preallocate_memory(void) { struct zone *zone; unsigned long saveable, size, max_size, count, highmem, pages = 0; - unsigned long alloc, pages_highmem; + unsigned long alloc, save_highmem, pages_highmem; struct timeval start, stop; - int error = 0; + int error; - printk(KERN_INFO "PM: Shrinking memory... "); + printk(KERN_INFO "PM: Preallocating image memory... "); do_gettimeofday(&start); + error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); + if (error) + goto err_out; + + alloc_normal = 0; + alloc_highmem = 0; + /* Count the number of saveable data pages. */ - highmem = count_highmem_pages(); + save_highmem = count_highmem_pages(); saveable = count_data_pages(); /* @@ -1174,7 +1254,8 @@ int swsusp_shrink_memory(void) * number of pages needed for image metadata (size). */ count = saveable; - saveable += highmem; + saveable += save_highmem; + highmem = save_highmem; size = 0; for_each_populated_zone(zone) { size += snapshot_additional_pages(zone); @@ -1193,10 +1274,13 @@ int swsusp_shrink_memory(void) size = max_size; /* * If the maximum is not less than the current number of saveable pages - * in memory, we don't need to do anything more. + * in memory, allocate page frames for the image and we're done. */ - if (size >= saveable) + if (size >= saveable) { + pages = preallocate_image_highmem(save_highmem); + pages += preallocate_image_memory(saveable - pages); goto out; + } /* * Let the memory management subsystem know that we're going to need a @@ -1216,10 +1300,8 @@ int swsusp_shrink_memory(void) pages_highmem = preallocate_image_highmem(highmem / 2); alloc = (count - max_size) - pages_highmem; pages = preallocate_image_memory(alloc); - if (pages < alloc) { - error = -ENOMEM; - goto free_out; - } + if (pages < alloc) + goto err_out; size = max_size - size; alloc = size; size = preallocate_highmem_fraction(size, highmem, count); @@ -1228,21 +1310,24 @@ int swsusp_shrink_memory(void) pages += preallocate_image_memory(alloc); pages += pages_highmem; - free_out: - /* Release all of the preallocated page frames. */ - swsusp_free(); - - if (error) { - printk(KERN_CONT "\n"); - return error; - } + /* + * We only need as many page frames for the image as there are saveable + * pages in memory, but we have allocated more. Release the excessive + * ones now. + */ + free_unnecessary_pages(); out: do_gettimeofday(&stop); - printk(KERN_CONT "done (preallocated %lu free pages)\n", pages); - swsusp_show_speed(&start, &stop, pages, "Freed"); + printk(KERN_CONT "done (allocated %lu pages)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Allocated"); return 0; + + err_out: + printk(KERN_CONT "\n"); + swsusp_free(); + return -ENOMEM; } #ifdef CONFIG_HIGHMEM @@ -1253,7 +1338,7 @@ int swsusp_shrink_memory(void) static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { - unsigned int free_highmem = count_free_highmem_pages(); + unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; if (free_highmem >= nr_highmem) nr_highmem = 0; @@ -1275,19 +1360,17 @@ count_pages_for_highmem(unsigned int nr_highmem) { return 0; } static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; - unsigned int free = 0, meta = 0; + unsigned int free = alloc_normal; - for_each_zone(zone) { - meta += snapshot_additional_pages(zone); + for_each_zone(zone) if (!is_highmem(zone)) free += zone_page_state(zone, NR_FREE_PAGES); - } nr_pages += count_pages_for_highmem(nr_highmem); - pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n", - nr_pages, PAGES_FOR_IO, meta, free); + pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n", + nr_pages, PAGES_FOR_IO, free); - return free > nr_pages + PAGES_FOR_IO + meta; + return free > nr_pages + PAGES_FOR_IO; } #ifdef CONFIG_HIGHMEM @@ -1309,7 +1392,7 @@ static inline int get_highmem_buffer(int safe_needed) */ static inline unsigned int -alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) { unsigned int to_alloc = count_free_highmem_pages(); @@ -1329,7 +1412,7 @@ alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) static inline int get_highmem_buffer(int safe_needed) { return 0; } static inline unsigned int -alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } #endif /* CONFIG_HIGHMEM */ /** @@ -1348,51 +1431,36 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error; - - error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); - if (error) - goto Free; - - error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); - if (error) - goto Free; + int error = 0; if (nr_highmem > 0) { error = get_highmem_buffer(PG_ANY); if (error) - goto Free; - - nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); + goto err_out; + if (nr_highmem > alloc_highmem) { + nr_highmem -= alloc_highmem; + nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); + } } - while (nr_pages-- > 0) { - struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); - - if (!page) - goto Free; + if (nr_pages > alloc_normal) { + nr_pages -= alloc_normal; + while (nr_pages-- > 0) { + struct page *page; - memory_bm_set_bit(copy_bm, page_to_pfn(page)); + page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) + goto err_out; + memory_bm_set_bit(copy_bm, page_to_pfn(page)); + } } + return 0; - Free: + err_out: swsusp_free(); - return -ENOMEM; + return error; } -/* Memory bitmap used for marking saveable pages (during suspend) or the - * suspend image pages (during resume) - */ -static struct memory_bitmap orig_bm; -/* Memory bitmap used on suspend for marking allocated pages that will contain - * the copies of saveable pages. During resume it is initially used for - * marking the suspend image pages, but then its set bits are duplicated in - * @orig_bm and it is released. Next, on systems with high memory, it may be - * used for marking "safe" highmem pages, but it has to be reinitialized for - * this purpose. - */ -static struct memory_bitmap copy_bm; - asmlinkage int swsusp_save(void) { unsigned int nr_pages, nr_highmem; -- cgit v1.1 From ef4aede3f10d82adef1fb044b565ba5f08f851e0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 8 Jul 2009 13:24:12 +0200 Subject: PM/Hibernate: Do not try to allocate too much memory too hard (rev. 2) We want to avoid attempting to free too much memory too hard during hibernation, so estimate the minimum size of the image to use as the lower limit for preallocating memory. The approach here is based on the (experimental) observation that we can't free more page frames than the sum of: * global_page_state(NR_SLAB_RECLAIMABLE) * global_page_state(NR_ACTIVE_ANON) * global_page_state(NR_INACTIVE_ANON) * global_page_state(NR_ACTIVE_FILE) * global_page_state(NR_INACTIVE_FILE) minus * global_page_state(NR_FILE_MAPPED) Namely, if this number is subtracted from the number of saveable pages in the system, we get a good estimate of the minimum reasonable size of a hibernation image. Signed-off-by: Rafael J. Wysocki Acked-by: Wu Fengguang --- kernel/power/snapshot.c | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 2b1a7bc..0a06b11 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1204,6 +1204,36 @@ static void free_unnecessary_pages(void) } /** + * minimum_image_size - Estimate the minimum acceptable size of an image + * @saveable: Number of saveable pages in the system. + * + * We want to avoid attempting to free too much memory too hard, so estimate the + * minimum acceptable size of a hibernation image to use as the lower limit for + * preallocating memory. + * + * We assume that the minimum image size should be proportional to + * + * [number of saveable pages] - [number of pages that can be freed in theory] + * + * where the second term is the sum of (1) reclaimable slab pages, (2) active + * and (3) inactive anonymouns pages, (4) active and (5) inactive file pages, + * minus mapped file pages. + */ +static unsigned long minimum_image_size(unsigned long saveable) +{ + unsigned long size; + + size = global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_ACTIVE_ANON) + + global_page_state(NR_INACTIVE_ANON) + + global_page_state(NR_ACTIVE_FILE) + + global_page_state(NR_INACTIVE_FILE) + - global_page_state(NR_FILE_MAPPED); + + return saveable <= size ? 0 : saveable - size; +} + +/** * hibernate_preallocate_memory - Preallocate memory for hibernation image * * To create a hibernation image it is necessary to make a copy of every page @@ -1220,8 +1250,8 @@ static void free_unnecessary_pages(void) * * If image_size is set below the number following from the above formula, * the preallocation of memory is continued until the total number of saveable - * pages in the system is below the requested image size or it is impossible to - * allocate more memory, whichever happens first. + * pages in the system is below the requested image size or the minimum + * acceptable image size returned by minimum_image_size(), whichever is greater. */ int hibernate_preallocate_memory(void) { @@ -1282,6 +1312,11 @@ int hibernate_preallocate_memory(void) goto out; } + /* Estimate the minimum size of the image. */ + pages = minimum_image_size(saveable); + if (size < pages) + size = min_t(unsigned long, pages, max_size); + /* * Let the memory management subsystem know that we're going to need a * large number of page frames to allocate and make it free some memory. @@ -1294,8 +1329,8 @@ int hibernate_preallocate_memory(void) * The number of saveable pages in memory was too high, so apply some * pressure to decrease it. First, make room for the largest possible * image and fail if that doesn't work. Next, try to decrease the size - * of the image as much as indicated by image_size using allocations - * from highmem and non-highmem zones separately. + * of the image as much as indicated by 'size' using allocations from + * highmem and non-highmem zones separately. */ pages_highmem = preallocate_image_highmem(highmem / 2); alloc = (count - max_size) - pages_highmem; -- cgit v1.1 From 98e73dc5d2dadfcb95305ad71ac9239f4e361870 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 22 Jul 2009 00:36:56 +0200 Subject: PM / Hibernate / Memory hotplug: Always use for_each_populated_zone() Use for_each_populated_zone() instead of for_each_zone() in hibernation code. This fixes a bug on s390, where we allow both config options HIBERNATION and MEMORY_HOTPLUG, so that we also have a ZONE_MOVABLE here. We only allow hibernation if no memory hotplug operation was performed, so in fact both features can only be used exclusively, but this way we don't need 2 differently configured (distribution) kernels. If we have an unpopulated ZONE_MOVABLE, we allow hibernation but run into a BUG_ON() in memory_bm_test/set/clear_bit() because hibernation code iterates through all zones, not only the populated zones, in several places. For example, swsusp_free() does for_each_zone() and then checks for pfn_valid(), which is true even if the zone is not populated, resulting in a BUG_ON() later because the pfn cannot be found in the memory bitmap. Replacing all occurences of for_each_zone() in hibernation code with for_each_populated_zone() would fix this issue. [rjw: Rebased on top of linux-next hibernation patches.] Signed-off-by: Gerald Schaefer Acked-by: KOSAKI Motohiro Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0a06b11..bf06658 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -853,7 +853,7 @@ static unsigned int count_highmem_pages(void) struct zone *zone; unsigned int n = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long pfn, max_zone_pfn; if (!is_highmem(zone)) @@ -916,7 +916,7 @@ static unsigned int count_data_pages(void) unsigned long pfn, max_zone_pfn; unsigned int n = 0; - for_each_zone(zone) { + for_each_populated_zone(zone) { if (is_highmem(zone)) continue; @@ -1010,7 +1010,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) struct zone *zone; unsigned long pfn; - for_each_zone(zone) { + for_each_populated_zone(zone) { unsigned long max_zone_pfn; mark_free_pages(zone); @@ -1065,7 +1065,7 @@ void swsusp_free(void) struct zone *zone; unsigned long pfn, max_zone_pfn; - for_each_zone(zone) { + for_each_populated_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) { @@ -1397,7 +1397,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) struct zone *zone; unsigned int free = alloc_normal; - for_each_zone(zone) + for_each_populated_zone(zone) if (!is_highmem(zone)) free += zone_page_state(zone, NR_FREE_PAGES); @@ -1688,7 +1688,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) unsigned long pfn, max_zone_pfn; /* Clear page flags */ - for_each_zone(zone) { + for_each_populated_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) -- cgit v1.1 From 8de0307326be94148436082a9abf365da8e3c66d Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 22 Jul 2009 19:56:10 +0200 Subject: PM: Trivial fixes Fix the definition of BM_BITS_PER_BLOCK and kerneldoc description of create_bm_block_list(). [rjw: Added changelog.] Signed-off-by: Wu Fengguang Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index bf06658..97955b0 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -233,7 +233,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) #define BM_END_OF_MAP (~0UL) -#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) +#define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) struct bm_block { struct list_head hook; /* hook into a list of bitmap blocks */ @@ -275,7 +275,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); /** * create_bm_block_list - create a list of block bitmap objects - * @nr_blocks - number of blocks to allocate + * @pages - number of pages to track * @list - list to put the allocated blocks into * @ca - chain allocator to be used for allocating memory */ -- cgit v1.1