From 30fe6884021b9fa0124609e898a6341be188eb44 Mon Sep 17 00:00:00 2001 From: Sandeep Tripathy Date: Wed, 2 Jul 2014 15:00:58 +0530 Subject: cpuidle: move idle traces to cpuidle_enter_state() idle_exit event is the first event after a core exits idle state. So this should be traced before local irq is ebabled. Likewise idle_entry is the last event before a core enters idle state. This will ease visualising the cpu idle state from kernel traces. Signed-off-by: Sandeep Tripathy Acked-by: Daniel Lezcano [rjw: Subject, rebase] Signed-off-by: Rafael J. Wysocki --- kernel/sched/idle.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index cf009fb..658a58d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -147,8 +147,6 @@ use_default: clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) goto use_default; - trace_cpu_idle_rcuidle(next_state, dev->cpu); - /* * Enter the idle state previously returned by the governor decision. * This function will block until an interrupt occurs and will take @@ -156,8 +154,6 @@ use_default: */ entered_state = cpuidle_enter(drv, dev, next_state); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); - if (broadcast) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); -- cgit v1.1 From d431cbc53cb787a7f82d7d2fe0af65156db4d27a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 15 Jul 2014 22:02:11 +0200 Subject: PM / sleep: Simplify sleep states sysfs interface code Simplify the sleep states sysfs interface /sys/power/state code by redefining pm_states[] as an array of pointers to constant strings such that only the entries corresponding to valid states are set. Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 21 +++++++++++---------- kernel/power/power.h | 7 +------ kernel/power/suspend.c | 32 +++++++++++++------------------- kernel/power/suspend_test.c | 12 ++++++------ 4 files changed, 31 insertions(+), 41 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 8e90f33..d57f66a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -296,8 +296,8 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, suspend_state_t i; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) - if (pm_states[i].state) - s += sprintf(s,"%s ", pm_states[i].label); + if (pm_states[i]) + s += sprintf(s,"%s ", pm_states[i]); #endif if (hibernation_available()) @@ -311,8 +311,7 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, static suspend_state_t decode_state(const char *buf, size_t n) { #ifdef CONFIG_SUSPEND - suspend_state_t state = PM_SUSPEND_MIN; - struct pm_sleep_state *s; + suspend_state_t state; #endif char *p; int len; @@ -325,10 +324,12 @@ static suspend_state_t decode_state(const char *buf, size_t n) return PM_SUSPEND_MAX; #ifdef CONFIG_SUSPEND - for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) - if (s->state && len == strlen(s->label) - && !strncmp(buf, s->label, len)) - return s->state; + for (state = PM_SUSPEND_MIN; state < PM_SUSPEND_MAX; state++) { + const char *label = pm_states[state]; + + if (label && len == strlen(label) && !strncmp(buf, label, len)) + return state; + } #endif return PM_SUSPEND_ON; @@ -446,8 +447,8 @@ static ssize_t autosleep_show(struct kobject *kobj, #ifdef CONFIG_SUSPEND if (state < PM_SUSPEND_MAX) - return sprintf(buf, "%s\n", pm_states[state].state ? - pm_states[state].label : "error"); + return sprintf(buf, "%s\n", pm_states[state] ? + pm_states[state] : "error"); #endif #ifdef CONFIG_HIBERNATION return sprintf(buf, "disk\n"); diff --git a/kernel/power/power.h b/kernel/power/power.h index c60f13b..5d49dca 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -178,13 +178,8 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND -struct pm_sleep_state { - const char *label; - suspend_state_t state; -}; - /* kernel/power/suspend.c */ -extern struct pm_sleep_state pm_states[]; +extern const char *pm_states[]; extern int suspend_devices_and_enter(suspend_state_t state); #else /* !CONFIG_SUSPEND */ diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index ed35a47..83f5b3e 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -31,11 +31,8 @@ #include "power.h" -struct pm_sleep_state pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_FREEZE] = { .label = "freeze", .state = PM_SUSPEND_FREEZE }, - [PM_SUSPEND_STANDBY] = { .label = "standby", }, - [PM_SUSPEND_MEM] = { .label = "mem", }, -}; +static const char *pm_labels[] = { "mem", "standby", "freeze", }; +const char *pm_states[PM_SUSPEND_MAX]; static const struct platform_suspend_ops *suspend_ops; static const struct platform_freeze_ops *freeze_ops; @@ -97,10 +94,7 @@ static bool relative_states; static int __init sleep_states_setup(char *str) { relative_states = !strncmp(str, "1", 1); - if (relative_states) { - pm_states[PM_SUSPEND_MEM].state = PM_SUSPEND_FREEZE; - pm_states[PM_SUSPEND_FREEZE].state = 0; - } + pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2]; return 1; } @@ -113,20 +107,20 @@ __setup("relative_sleep_states=", sleep_states_setup); void suspend_set_ops(const struct platform_suspend_ops *ops) { suspend_state_t i; - int j = PM_SUSPEND_MAX - 1; + int j = 0; lock_system_sleep(); suspend_ops = ops; for (i = PM_SUSPEND_MEM; i >= PM_SUSPEND_STANDBY; i--) - if (valid_state(i)) - pm_states[j--].state = i; - else if (!relative_states) - pm_states[j--].state = 0; + if (valid_state(i)) { + pm_states[i] = pm_labels[j++]; + } else if (!relative_states) { + pm_states[i] = NULL; + j++; + } - pm_states[j--].state = PM_SUSPEND_FREEZE; - while (j >= PM_SUSPEND_MIN) - pm_states[j--].state = 0; + pm_states[PM_SUSPEND_FREEZE] = pm_labels[j]; unlock_system_sleep(); } @@ -395,7 +389,7 @@ static int enter_state(suspend_state_t state) printk("done.\n"); trace_suspend_resume(TPS("sync_filesystems"), 0, false); - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label); + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); error = suspend_prepare(state); if (error) goto Unlock; @@ -404,7 +398,7 @@ static int enter_state(suspend_state_t state) goto Finish; trace_suspend_resume(TPS("suspend_enter"), state, false); - pr_debug("PM: Entering %s sleep\n", pm_states[state].label); + pr_debug("PM: Entering %s sleep\n", pm_states[state]); pm_restrict_gfp_mask(); error = suspend_devices_and_enter(state); pm_restore_gfp_mask(); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c index 269b097..2f524928 100644 --- a/kernel/power/suspend_test.c +++ b/kernel/power/suspend_test.c @@ -92,13 +92,13 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) } if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state].label); + printk(info_test, pm_states[state]); status = pm_suspend(state); if (status == -ENODEV) state = PM_SUSPEND_STANDBY; } if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state].label); + printk(info_test, pm_states[state]); status = pm_suspend(state); } if (status < 0) @@ -141,8 +141,8 @@ static int __init setup_test_suspend(char *value) /* "=mem" ==> "mem" */ value++; for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) - if (!strcmp(pm_states[i].label, value)) { - test_state = pm_states[i].state; + if (!strcmp(pm_states[i], value)) { + test_state = i; return 0; } @@ -162,8 +162,8 @@ static int __init test_suspend(void) /* PM is initialized by now; is that state testable? */ if (test_state == PM_SUSPEND_ON) goto done; - if (!pm_states[test_state].state) { - printk(warn_bad_state, pm_states[test_state].label); + if (!pm_states[test_state]) { + printk(warn_bad_state, pm_states[test_state]); goto done; } -- cgit v1.1 From 78c5e0bb145d3eac719fcad1ac1df763a71cf632 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 22 Jul 2014 15:43:12 +0100 Subject: PM / OPP: Remove ARCH_HAS_OPP Since the OPP layer is a kernel library which has been converted to be directly selectable by its callers rather than user selectable and requiring architectures to enable it explicitly the ARCH_HAS_OPP symbol has become redundant and can be removed. Do so. Signed-off-by: Mark Brown Reviewed-by: Viresh Kumar Acked-by: Nishanth Menon Acked-by: Rob Herring Acked-by: Shawn Guo Acked-by: Simon Horman Signed-off-by: Rafael J. Wysocki --- kernel/power/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 9a83d78..e4e4121 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -253,9 +253,6 @@ config APM_EMULATION anything, try disabling/enabling this option (or disabling/enabling APM in your BIOS). -config ARCH_HAS_OPP - bool - config PM_OPP bool ---help--- -- cgit v1.1 From 8490fdf923fc6cf6c31a53b73cafdf582a9642f0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Jul 2014 00:57:53 +0200 Subject: PM / sleep: Move platform suspend operations to separate functions After the introduction of freeze_ops it makes more sense to move all of the platform suspend operations to separate functions that each will do all of the necessary checks and choose the right callback to execute istead of doing all that in the core code which makes it generally harder to follow. Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 120 +++++++++++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 43 deletions(-) (limited to 'kernel') diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 83f5b3e..9a071be 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -36,12 +36,6 @@ const char *pm_states[PM_SUSPEND_MAX]; static const struct platform_suspend_ops *suspend_ops; static const struct platform_freeze_ops *freeze_ops; - -static bool need_suspend_ops(suspend_state_t state) -{ - return state > PM_SUSPEND_FREEZE; -} - static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head); static bool suspend_freeze_wake; @@ -139,6 +133,65 @@ int suspend_valid_only_mem(suspend_state_t state) } EXPORT_SYMBOL_GPL(suspend_valid_only_mem); +static bool sleep_state_supported(suspend_state_t state) +{ + return state == PM_SUSPEND_FREEZE || (suspend_ops && suspend_ops->enter); +} + +static int platform_suspend_prepare(suspend_state_t state) +{ + return state != PM_SUSPEND_FREEZE && suspend_ops->prepare ? + suspend_ops->prepare() : 0; +} + +static int platform_suspend_prepare_late(suspend_state_t state) +{ + return state != PM_SUSPEND_FREEZE && suspend_ops->prepare_late ? + suspend_ops->prepare_late() : 0; +} + +static void platform_suspend_wake(suspend_state_t state) +{ + if (state != PM_SUSPEND_FREEZE && suspend_ops->wake) + suspend_ops->wake(); +} + +static void platform_suspend_finish(suspend_state_t state) +{ + if (state != PM_SUSPEND_FREEZE && suspend_ops->finish) + suspend_ops->finish(); +} + +static int platform_suspend_begin(suspend_state_t state) +{ + if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) + return freeze_ops->begin(); + else if (suspend_ops->begin) + return suspend_ops->begin(state); + else + return 0; +} + +static void platform_suspend_end(suspend_state_t state) +{ + if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) + freeze_ops->end(); + else if (suspend_ops->end) + suspend_ops->end(); +} + +static void platform_suspend_recover(suspend_state_t state) +{ + if (state != PM_SUSPEND_FREEZE && suspend_ops->recover) + suspend_ops->recover(); +} + +static bool platform_suspend_again(suspend_state_t state) +{ + return state != PM_SUSPEND_FREEZE && suspend_ops->suspend_again ? + suspend_ops->suspend_again() : false; +} + static int suspend_test(int level) { #ifdef CONFIG_PM_DEBUG @@ -162,7 +215,7 @@ static int suspend_prepare(suspend_state_t state) { int error; - if (need_suspend_ops(state) && (!suspend_ops || !suspend_ops->enter)) + if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); @@ -208,23 +261,18 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) { int error; - if (need_suspend_ops(state) && suspend_ops->prepare) { - error = suspend_ops->prepare(); - if (error) - goto Platform_finish; - } + error = platform_suspend_prepare(state); + if (error) + goto Platform_finish; error = dpm_suspend_end(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Platform_finish; } - - if (need_suspend_ops(state) && suspend_ops->prepare_late) { - error = suspend_ops->prepare_late(); - if (error) - goto Platform_wake; - } + error = platform_suspend_prepare_late(state); + if (error) + goto Platform_wake; if (suspend_test(TEST_PLATFORM)) goto Platform_wake; @@ -272,15 +320,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) ftrace_start(); Platform_wake: - if (need_suspend_ops(state) && suspend_ops->wake) - suspend_ops->wake(); - + platform_suspend_wake(state); dpm_resume_start(PMSG_RESUME); Platform_finish: - if (need_suspend_ops(state) && suspend_ops->finish) - suspend_ops->finish(); - + platform_suspend_finish(state); return error; } @@ -293,18 +337,13 @@ int suspend_devices_and_enter(suspend_state_t state) int error; bool wakeup = false; - if (need_suspend_ops(state) && !suspend_ops) + if (!sleep_state_supported(state)) return -ENOSYS; - if (need_suspend_ops(state) && suspend_ops->begin) { - error = suspend_ops->begin(state); - if (error) - goto Close; - } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) { - error = freeze_ops->begin(); - if (error) - goto Close; - } + error = platform_suspend_begin(state); + if (error) + goto Close; + suspend_console(); suspend_test_start(); error = dpm_suspend_start(PMSG_SUSPEND); @@ -318,25 +357,20 @@ int suspend_devices_and_enter(suspend_state_t state) do { error = suspend_enter(state, &wakeup); - } while (!error && !wakeup && need_suspend_ops(state) - && suspend_ops->suspend_again && suspend_ops->suspend_again()); + } while (!error && !wakeup && platform_suspend_again(state)); Resume_devices: suspend_test_start(); dpm_resume_end(PMSG_RESUME); suspend_test_finish("resume devices"); resume_console(); - Close: - if (need_suspend_ops(state) && suspend_ops->end) - suspend_ops->end(); - else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) - freeze_ops->end(); + Close: + platform_suspend_end(state); return error; Recover_platform: - if (need_suspend_ops(state) && suspend_ops->recover) - suspend_ops->recover(); + platform_suspend_recover(state); goto Resume_devices; } -- cgit v1.1 From 28cb5ef16e578bbca0a562b09f12c8c98ca92720 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Jul 2014 01:00:36 +0200 Subject: PM: Create PM workqueue if runtime PM is not configured too The PM workqueue is going to be used by ACPI PM notify handlers regardless of whether or not runtime PM is configured, so move it out of #ifdef CONFIG_PM_RUNTIME. Do that in three places in the ACPI device PM code. Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 8e90f33..a18efed 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -615,7 +615,6 @@ static struct attribute_group attr_group = { .attrs = g, }; -#ifdef CONFIG_PM_RUNTIME struct workqueue_struct *pm_wq; EXPORT_SYMBOL_GPL(pm_wq); @@ -625,9 +624,6 @@ static int __init pm_start_workqueue(void) return pm_wq ? 0 : -ENOMEM; } -#else -static inline int pm_start_workqueue(void) { return 0; } -#endif static int __init pm_init(void) { -- cgit v1.1 From f469f02dc6fa67f6c6a7d91400d08b9339147aed Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:57 +0200 Subject: PM / Hibernate: Create a Radix-Tree to store memory bitmap This patch adds the code to allocate and build the radix tree to store the memory bitmap. The old data structure is left in place until the radix tree implementation is finished. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 222 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 1ea328a..5a0eafd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -248,11 +248,24 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) * information is stored (in the form of a block of bitmap) * It also contains the pfns that correspond to the start and end of * the represented memory area. + * + * The memory bitmap is organized as a radix tree to guarantee fast random + * access to the bits. There is one radix tree for each zone (as returned + * from create_mem_extents). + * + * One radix tree is represented by one struct mem_zone_bm_rtree. There are + * two linked lists for the nodes of the tree, one for the inner nodes and + * one for the leave nodes. The linked leave nodes are used for fast linear + * access of the memory bitmap. + * + * The struct rtree_node represents one node of the radix tree. */ #define BM_END_OF_MAP (~0UL) #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) +#define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) +#define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) struct bm_block { struct list_head hook; /* hook into a list of bitmap blocks */ @@ -266,6 +279,31 @@ static inline unsigned long bm_block_bits(struct bm_block *bb) return bb->end_pfn - bb->start_pfn; } +/* + * struct rtree_node is a wrapper struct to link the nodes + * of the rtree together for easy linear iteration over + * bits and easy freeing + */ +struct rtree_node { + struct list_head list; + unsigned long *data; +}; + +/* + * struct mem_zone_bm_rtree represents a bitmap used for one + * populated memory zone. + */ +struct mem_zone_bm_rtree { + struct list_head list; /* Link Zones together */ + struct list_head nodes; /* Radix Tree inner nodes */ + struct list_head leaves; /* Radix Tree leaves */ + unsigned long start_pfn; /* Zone start page frame */ + unsigned long end_pfn; /* Zone end page frame + 1 */ + struct rtree_node *rtree; /* Radix Tree Root */ + int levels; /* Number of Radix Tree Levels */ + unsigned int blocks; /* Number of Bitmap Blocks */ +}; + /* strcut bm_position is used for browsing memory bitmaps */ struct bm_position { @@ -274,6 +312,7 @@ struct bm_position { }; struct memory_bitmap { + struct list_head zones; struct list_head blocks; /* list of bitmap blocks */ struct linked_page *p_list; /* list of pages used to store zone * bitmap objects and bitmap block @@ -284,6 +323,166 @@ struct memory_bitmap { /* Functions that operate on memory bitmaps */ +#define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) +#if BITS_PER_LONG == 32 +#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) +#else +#define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) +#endif +#define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) + +/* + * alloc_rtree_node - Allocate a new node and add it to the radix tree. + * + * This function is used to allocate inner nodes as well as the + * leave nodes of the radix tree. It also adds the node to the + * corresponding linked list passed in by the *list parameter. + */ +static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, + struct chain_allocator *ca, + struct list_head *list) +{ + struct rtree_node *node; + + node = chain_alloc(ca, sizeof(struct rtree_node)); + if (!node) + return NULL; + + node->data = get_image_page(gfp_mask, safe_needed); + if (!node->data) + return NULL; + + list_add_tail(&node->list, list); + + return node; +} + +/* + * add_rtree_block - Add a new leave node to the radix tree + * + * The leave nodes need to be allocated in order to keep the leaves + * linked list in order. This is guaranteed by the zone->blocks + * counter. + */ +static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, + int safe_needed, struct chain_allocator *ca) +{ + struct rtree_node *node, *block, **dst; + unsigned int levels_needed, block_nr; + int i; + + block_nr = zone->blocks; + levels_needed = 0; + + /* How many levels do we need for this block nr? */ + while (block_nr) { + levels_needed += 1; + block_nr >>= BM_RTREE_LEVEL_SHIFT; + } + + /* Make sure the rtree has enough levels */ + for (i = zone->levels; i < levels_needed; i++) { + node = alloc_rtree_node(gfp_mask, safe_needed, ca, + &zone->nodes); + if (!node) + return -ENOMEM; + + node->data[0] = (unsigned long)zone->rtree; + zone->rtree = node; + zone->levels += 1; + } + + /* Allocate new block */ + block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); + if (!block) + return -ENOMEM; + + /* Now walk the rtree to insert the block */ + node = zone->rtree; + dst = &zone->rtree; + block_nr = zone->blocks; + for (i = zone->levels; i > 0; i--) { + int index; + + if (!node) { + node = alloc_rtree_node(gfp_mask, safe_needed, ca, + &zone->nodes); + if (!node) + return -ENOMEM; + *dst = node; + } + + index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); + index &= BM_RTREE_LEVEL_MASK; + dst = (struct rtree_node **)&((*dst)->data[index]); + node = *dst; + } + + zone->blocks += 1; + *dst = block; + + return 0; +} + +static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, + int clear_nosave_free); + +/* + * create_zone_bm_rtree - create a radix tree for one zone + * + * Allocated the mem_zone_bm_rtree structure and initializes it. + * This function also allocated and builds the radix tree for the + * zone. + */ +static struct mem_zone_bm_rtree * +create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, + struct chain_allocator *ca, + unsigned long start, unsigned long end) +{ + struct mem_zone_bm_rtree *zone; + unsigned int i, nr_blocks; + unsigned long pages; + + pages = end - start; + zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); + if (!zone) + return NULL; + + INIT_LIST_HEAD(&zone->nodes); + INIT_LIST_HEAD(&zone->leaves); + zone->start_pfn = start; + zone->end_pfn = end; + nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); + + for (i = 0; i < nr_blocks; i++) { + if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { + free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); + return NULL; + } + } + + return zone; +} + +/* + * free_zone_bm_rtree - Free the memory of the radix tree + * + * Free all node pages of the radix tree. The mem_zone_bm_rtree + * structure itself is not freed here nor are the rtree_node + * structs. + */ +static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, + int clear_nosave_free) +{ + struct rtree_node *node; + + list_for_each_entry(node, &zone->nodes, list) + free_image_page(node->data, clear_nosave_free); + + list_for_each_entry(node, &zone->leaves, list) + free_image_page(node->data, clear_nosave_free); +} + static void memory_bm_position_reset(struct memory_bitmap *bm) { bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); @@ -408,12 +607,14 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) chain_init(&ca, gfp_mask, safe_needed); INIT_LIST_HEAD(&bm->blocks); + INIT_LIST_HEAD(&bm->zones); error = create_mem_extents(&mem_extents, gfp_mask); if (error) return error; list_for_each_entry(ext, &mem_extents, hook) { + struct mem_zone_bm_rtree *zone; struct bm_block *bb; unsigned long pfn = ext->start; unsigned long pages = ext->end - ext->start; @@ -441,6 +642,12 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) } bb->end_pfn = pfn; } + + zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, + ext->start, ext->end); + if (!zone) + goto Error; + list_add_tail(&zone->list, &bm->zones); } bm->p_list = ca.chain; @@ -460,14 +667,19 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) */ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { + struct mem_zone_bm_rtree *zone; struct bm_block *bb; list_for_each_entry(bb, &bm->blocks, hook) if (bb->data) free_image_page(bb->data, clear_nosave_free); + list_for_each_entry(zone, &bm->zones, list) + free_zone_bm_rtree(zone, clear_nosave_free); + free_list_of_pages(bm->p_list, clear_nosave_free); + INIT_LIST_HEAD(&bm->zones); INIT_LIST_HEAD(&bm->blocks); } @@ -816,12 +1028,21 @@ void free_basic_memory_bitmaps(void) unsigned int snapshot_additional_pages(struct zone *zone) { + unsigned int rtree, nodes; unsigned int res; res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); res += DIV_ROUND_UP(res * sizeof(struct bm_block), LINKED_PAGE_DATA_SIZE); - return 2 * res; + rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); + rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), + LINKED_PAGE_DATA_SIZE); + while (nodes > 1) { + nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); + rtree += nodes; + } + + return 2 * (res + rtree); } #ifdef CONFIG_HIGHMEM -- cgit v1.1 From 07a338236fdcd6caf41541dcdf879f5758020ab1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:58 +0200 Subject: PM / Hibernate: Add memory_rtree_find_bit function Add a function to find a bit in the radix tree for a given pfn. Also add code to the memory bitmap wrapper functions to use the radix tree together with the existing memory bitmap implementation. On read accesses compare the results of both bitmaps to make sure the radix tree behaves the same way. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5a0eafd..0b7f934 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -720,6 +720,56 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, return 0; } +/* + * memory_rtree_find_bit - Find the bit for pfn in the memory + * bitmap + * + * Walks the radix tree to find the page which contains the bit for + * pfn and returns the bit position in **addr and *bit_nr. + */ +static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) +{ + struct mem_zone_bm_rtree *curr, *zone; + struct rtree_node *node; + int i, block_nr; + + zone = NULL; + + /* Find the right zone */ + list_for_each_entry(curr, &bm->zones, list) { + if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { + zone = curr; + break; + } + } + + if (!zone) + return -EFAULT; + + /* + * We have a zone. Now walk the radix tree to find the leave + * node for our pfn. + */ + node = zone->rtree; + block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; + + for (i = zone->levels; i > 0; i--) { + int index; + + index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); + index &= BM_RTREE_LEVEL_MASK; + BUG_ON(node->data[index] == 0); + node = (struct rtree_node *)node->data[index]; + } + + /* Set return values */ + *addr = node->data; + *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; + + return 0; +} + static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -729,6 +779,10 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); set_bit(bit, addr); + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + set_bit(bit, addr); } static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) @@ -740,6 +794,13 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); if (!error) set_bit(bit, addr); + else + return error; + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + if (!error) + set_bit(bit, addr); + return error; } @@ -752,25 +813,42 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); clear_bit(bit, addr); + + error = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error); + clear_bit(bit, addr); } static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int error; + int error, error2; + int v; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); - return test_bit(bit, addr); + v = test_bit(bit, addr); + + error2 = memory_rtree_find_bit(bm, pfn, &addr, &bit); + BUG_ON(error2); + + WARN_ON_ONCE(v != test_bit(bit, addr)); + + return v; } static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; + int present; + + present = !memory_bm_find_bit(bm, pfn, &addr, &bit); + + WARN_ON_ONCE(present != !memory_rtree_find_bit(bm, pfn, &addr, &bit)); - return !memory_bm_find_bit(bm, pfn, &addr, &bit); + return present; } /** -- cgit v1.1 From 3a20cb1779616ebcaade393cc9beac0e03cbffef Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:26:59 +0200 Subject: PM / Hibernate: Implement position keeping in radix tree Add code to remember the last position that was requested in the radix tree. Use it as a cache for faster linear walking of the bitmap in the memory_bm_rtree_next_pfn() function which is also added with this patch. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0b7f934..802f241 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -309,6 +309,11 @@ struct mem_zone_bm_rtree { struct bm_position { struct bm_block *block; int bit; + + struct mem_zone_bm_rtree *zone; + struct rtree_node *node; + unsigned long node_pfn; + int node_bit; }; struct memory_bitmap { @@ -487,6 +492,13 @@ static void memory_bm_position_reset(struct memory_bitmap *bm) { bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); bm->cur.bit = 0; + + bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, + list); + bm->cur.node = list_entry(bm->cur.zone->leaves.next, + struct rtree_node, list); + bm->cur.node_pfn = 0; + bm->cur.node_bit = 0; } static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); @@ -734,6 +746,11 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, struct rtree_node *node; int i, block_nr; + zone = bm->cur.zone; + + if (pfn >= zone->start_pfn && pfn < zone->end_pfn) + goto zone_found; + zone = NULL; /* Find the right zone */ @@ -747,10 +764,16 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, if (!zone) return -EFAULT; +zone_found: /* * We have a zone. Now walk the radix tree to find the leave * node for our pfn. */ + + node = bm->cur.node; + if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + goto node_found; + node = zone->rtree; block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; @@ -763,6 +786,12 @@ static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, node = (struct rtree_node *)node->data[index]; } +node_found: + /* Update last position */ + bm->cur.zone = zone; + bm->cur.node = node; + bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; + /* Set return values */ *addr = node->data; *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; @@ -860,11 +889,16 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) * this function. */ +static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm); + static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { + unsigned long rtree_pfn; struct bm_block *bb; int bit; + rtree_pfn = memory_bm_rtree_next_pfn(bm); + bb = bm->cur.block; do { bit = bm->cur.bit; @@ -878,13 +912,77 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) } while (&bb->hook != &bm->blocks); memory_bm_position_reset(bm); + WARN_ON_ONCE(rtree_pfn != BM_END_OF_MAP); return BM_END_OF_MAP; Return_pfn: + WARN_ON_ONCE(bb->start_pfn + bit != rtree_pfn); bm->cur.bit = bit + 1; return bb->start_pfn + bit; } +/* + * rtree_next_node - Jumps to the next leave node + * + * Sets the position to the beginning of the next node in the + * memory bitmap. This is either the next node in the current + * zone's radix tree or the first node in the radix tree of the + * next zone. + * + * Returns true if there is a next node, false otherwise. + */ +static bool rtree_next_node(struct memory_bitmap *bm) +{ + bm->cur.node = list_entry(bm->cur.node->list.next, + struct rtree_node, list); + if (&bm->cur.node->list != &bm->cur.zone->leaves) { + bm->cur.node_pfn += BM_BITS_PER_BLOCK; + bm->cur.node_bit = 0; + return true; + } + + /* No more nodes, goto next zone */ + bm->cur.zone = list_entry(bm->cur.zone->list.next, + struct mem_zone_bm_rtree, list); + if (&bm->cur.zone->list != &bm->zones) { + bm->cur.node = list_entry(bm->cur.zone->leaves.next, + struct rtree_node, list); + bm->cur.node_pfn = 0; + bm->cur.node_bit = 0; + return true; + } + + /* No more zones */ + return false; +} + +/* + * memory_bm_rtree_next_pfn - Find the next set bit + * + * Starting from the last returned position this function searches + * for the next set bit in the memory bitmap and returns its + * number. If no more bit is set BM_END_OF_MAP is returned. + */ +static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm) +{ + unsigned long bits, pfn, pages; + int bit; + + do { + pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; + bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); + bit = find_next_bit(bm->cur.node->data, bits, + bm->cur.node_bit); + if (bit < bits) { + pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; + bm->cur.node_bit = bit + 1; + return pfn; + } + } while (rtree_next_node(bm)); + + return BM_END_OF_MAP; +} + /** * This structure represents a range of page frames the contents of which * should not be saved during the suspend. -- cgit v1.1 From 6efde38f07690652bf0d93f5e4f1a5f496574806 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:00 +0200 Subject: PM / Hibernate: Iterate over set bits instead of PFNs in swsusp_free() The existing implementation of swsusp_free iterates over all pfns in the system and checks every bit in the two memory bitmaps. This doesn't scale very well with large numbers of pfns, especially when the bitmaps are not populated very densly. Change the algorithm to iterate over the set bits in the bitmaps instead to make it scale better in large memory configurations. Also add a memory_bm_clear_current() helper function that clears the bit for the last position returned from the memory bitmap. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 53 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 802f241..5b71caf 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -848,6 +848,17 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) clear_bit(bit, addr); } +static void memory_bm_clear_current(struct memory_bitmap *bm) +{ + int bit; + + bit = max(bm->cur.node_bit - 1, 0); + clear_bit(bit, bm->cur.node->data); + + bit = max(bm->cur.bit - 1, 0); + clear_bit(bit, bm->cur.block->data); +} + static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -1491,23 +1502,35 @@ static struct memory_bitmap copy_bm; void swsusp_free(void) { - struct zone *zone; - unsigned long pfn, max_zone_pfn; + unsigned long fb_pfn, fr_pfn; - for_each_populated_zone(zone) { - max_zone_pfn = zone_end_pfn(zone); - for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - if (swsusp_page_is_forbidden(page) && - swsusp_page_is_free(page)) { - swsusp_unset_page_forbidden(page); - swsusp_unset_page_free(page); - __free_page(page); - } - } + memory_bm_position_reset(forbidden_pages_map); + memory_bm_position_reset(free_pages_map); + +loop: + fr_pfn = memory_bm_next_pfn(free_pages_map); + fb_pfn = memory_bm_next_pfn(forbidden_pages_map); + + /* + * Find the next bit set in both bitmaps. This is guaranteed to + * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. + */ + do { + if (fb_pfn < fr_pfn) + fb_pfn = memory_bm_next_pfn(forbidden_pages_map); + if (fr_pfn < fb_pfn) + fr_pfn = memory_bm_next_pfn(free_pages_map); + } while (fb_pfn != fr_pfn); + + if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { + struct page *page = pfn_to_page(fr_pfn); + + memory_bm_clear_current(forbidden_pages_map); + memory_bm_clear_current(free_pages_map); + __free_page(page); + goto loop; } + nr_copy_pages = 0; nr_meta_pages = 0; restore_pblist = NULL; -- cgit v1.1 From 9047eb629e5cd25ae3834d8c62ae02eb8c32bc17 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:01 +0200 Subject: PM / Hibernate: Remove the old memory-bitmap implementation The radix tree implementatio is proved to work the same as the old implementation now. So the old implementation can be removed to finish the switch to the radix tree for the memory bitmaps. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 223 +++++------------------------------------------- 1 file changed, 21 insertions(+), 202 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5b71caf..ab1998a 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -267,18 +267,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) -struct bm_block { - struct list_head hook; /* hook into a list of bitmap blocks */ - unsigned long start_pfn; /* pfn represented by the first bit */ - unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ - unsigned long *data; /* bitmap representing pages */ -}; - -static inline unsigned long bm_block_bits(struct bm_block *bb) -{ - return bb->end_pfn - bb->start_pfn; -} - /* * struct rtree_node is a wrapper struct to link the nodes * of the rtree together for easy linear iteration over @@ -307,9 +295,6 @@ struct mem_zone_bm_rtree { /* strcut bm_position is used for browsing memory bitmaps */ struct bm_position { - struct bm_block *block; - int bit; - struct mem_zone_bm_rtree *zone; struct rtree_node *node; unsigned long node_pfn; @@ -318,7 +303,6 @@ struct bm_position { struct memory_bitmap { struct list_head zones; - struct list_head blocks; /* list of bitmap blocks */ struct linked_page *p_list; /* list of pages used to store zone * bitmap objects and bitmap block * objects @@ -490,9 +474,6 @@ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, static void memory_bm_position_reset(struct memory_bitmap *bm) { - bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); - bm->cur.bit = 0; - bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, list); bm->cur.node = list_entry(bm->cur.zone->leaves.next, @@ -503,30 +484,6 @@ static void memory_bm_position_reset(struct memory_bitmap *bm) static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); -/** - * create_bm_block_list - create a list of block bitmap objects - * @pages - number of pages to track - * @list - list to put the allocated blocks into - * @ca - chain allocator to be used for allocating memory - */ -static int create_bm_block_list(unsigned long pages, - struct list_head *list, - struct chain_allocator *ca) -{ - unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); - - while (nr_blocks-- > 0) { - struct bm_block *bb; - - bb = chain_alloc(ca, sizeof(struct bm_block)); - if (!bb) - return -ENOMEM; - list_add(&bb->hook, list); - } - - return 0; -} - struct mem_extent { struct list_head hook; unsigned long start; @@ -618,7 +575,6 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) int error; chain_init(&ca, gfp_mask, safe_needed); - INIT_LIST_HEAD(&bm->blocks); INIT_LIST_HEAD(&bm->zones); error = create_mem_extents(&mem_extents, gfp_mask); @@ -627,38 +583,13 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) list_for_each_entry(ext, &mem_extents, hook) { struct mem_zone_bm_rtree *zone; - struct bm_block *bb; - unsigned long pfn = ext->start; - unsigned long pages = ext->end - ext->start; - - bb = list_entry(bm->blocks.prev, struct bm_block, hook); - - error = create_bm_block_list(pages, bm->blocks.prev, &ca); - if (error) - goto Error; - - list_for_each_entry_continue(bb, &bm->blocks, hook) { - bb->data = get_image_page(gfp_mask, safe_needed); - if (!bb->data) { - error = -ENOMEM; - goto Error; - } - - bb->start_pfn = pfn; - if (pages >= BM_BITS_PER_BLOCK) { - pfn += BM_BITS_PER_BLOCK; - pages -= BM_BITS_PER_BLOCK; - } else { - /* This is executed only once in the loop */ - pfn += pages; - } - bb->end_pfn = pfn; - } zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, ext->start, ext->end); - if (!zone) + if (!zone) { + error = -ENOMEM; goto Error; + } list_add_tail(&zone->list, &bm->zones); } @@ -680,11 +611,6 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { struct mem_zone_bm_rtree *zone; - struct bm_block *bb; - - list_for_each_entry(bb, &bm->blocks, hook) - if (bb->data) - free_image_page(bb->data, clear_nosave_free); list_for_each_entry(zone, &bm->zones, list) free_zone_bm_rtree(zone, clear_nosave_free); @@ -692,55 +618,20 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) free_list_of_pages(bm->p_list, clear_nosave_free); INIT_LIST_HEAD(&bm->zones); - INIT_LIST_HEAD(&bm->blocks); } /** - * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds - * to given pfn. The cur_zone_bm member of @bm and the cur_block member - * of @bm->cur_zone_bm are updated. - */ -static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, - void **addr, unsigned int *bit_nr) -{ - struct bm_block *bb; - - /* - * Check if the pfn corresponds to the current bitmap block and find - * the block where it fits if this is not the case. - */ - bb = bm->cur.block; - if (pfn < bb->start_pfn) - list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) - if (pfn >= bb->start_pfn) - break; - - if (pfn >= bb->end_pfn) - list_for_each_entry_continue(bb, &bm->blocks, hook) - if (pfn >= bb->start_pfn && pfn < bb->end_pfn) - break; - - if (&bb->hook == &bm->blocks) - return -EFAULT; - - /* The block has been found */ - bm->cur.block = bb; - pfn -= bb->start_pfn; - bm->cur.bit = pfn + 1; - *bit_nr = pfn; - *addr = bb->data; - return 0; -} - -/* - * memory_rtree_find_bit - Find the bit for pfn in the memory - * bitmap + * memory_bm_find_bit - Find the bit for pfn in the memory + * bitmap * - * Walks the radix tree to find the page which contains the bit for + * Find the bit in the bitmap @bm that corresponds to given pfn. + * The cur.zone, cur.block and cur.node_pfn member of @bm are + * updated. + * It walks the radix tree to find the page which contains the bit for * pfn and returns the bit position in **addr and *bit_nr. */ -static int memory_rtree_find_bit(struct memory_bitmap *bm, unsigned long pfn, - void **addr, unsigned int *bit_nr) +static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, + void **addr, unsigned int *bit_nr) { struct mem_zone_bm_rtree *curr, *zone; struct rtree_node *node; @@ -808,10 +699,6 @@ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); set_bit(bit, addr); - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error); - set_bit(bit, addr); } static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) @@ -823,12 +710,6 @@ static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); if (!error) set_bit(bit, addr); - else - return error; - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); - if (!error) - set_bit(bit, addr); return error; } @@ -842,10 +723,6 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); clear_bit(bit, addr); - - error = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error); - clear_bit(bit, addr); } static void memory_bm_clear_current(struct memory_bitmap *bm) @@ -854,82 +731,25 @@ static void memory_bm_clear_current(struct memory_bitmap *bm) bit = max(bm->cur.node_bit - 1, 0); clear_bit(bit, bm->cur.node->data); - - bit = max(bm->cur.bit - 1, 0); - clear_bit(bit, bm->cur.block->data); } static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int error, error2; - int v; + int error; error = memory_bm_find_bit(bm, pfn, &addr, &bit); BUG_ON(error); - v = test_bit(bit, addr); - - error2 = memory_rtree_find_bit(bm, pfn, &addr, &bit); - BUG_ON(error2); - - WARN_ON_ONCE(v != test_bit(bit, addr)); - - return v; + return test_bit(bit, addr); } static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) { void *addr; unsigned int bit; - int present; - - present = !memory_bm_find_bit(bm, pfn, &addr, &bit); - - WARN_ON_ONCE(present != !memory_rtree_find_bit(bm, pfn, &addr, &bit)); - return present; -} - -/** - * memory_bm_next_pfn - find the pfn that corresponds to the next set bit - * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is - * returned. - * - * It is required to run memory_bm_position_reset() before the first call to - * this function. - */ - -static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm); - -static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) -{ - unsigned long rtree_pfn; - struct bm_block *bb; - int bit; - - rtree_pfn = memory_bm_rtree_next_pfn(bm); - - bb = bm->cur.block; - do { - bit = bm->cur.bit; - bit = find_next_bit(bb->data, bm_block_bits(bb), bit); - if (bit < bm_block_bits(bb)) - goto Return_pfn; - - bb = list_entry(bb->hook.next, struct bm_block, hook); - bm->cur.block = bb; - bm->cur.bit = 0; - } while (&bb->hook != &bm->blocks); - - memory_bm_position_reset(bm); - WARN_ON_ONCE(rtree_pfn != BM_END_OF_MAP); - return BM_END_OF_MAP; - - Return_pfn: - WARN_ON_ONCE(bb->start_pfn + bit != rtree_pfn); - bm->cur.bit = bit + 1; - return bb->start_pfn + bit; + return !memory_bm_find_bit(bm, pfn, &addr, &bit); } /* @@ -967,14 +787,17 @@ static bool rtree_next_node(struct memory_bitmap *bm) return false; } -/* - * memory_bm_rtree_next_pfn - Find the next set bit +/** + * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm * * Starting from the last returned position this function searches * for the next set bit in the memory bitmap and returns its * number. If no more bit is set BM_END_OF_MAP is returned. + * + * It is required to run memory_bm_position_reset() before the + * first call to this function. */ -static unsigned long memory_bm_rtree_next_pfn(struct memory_bitmap *bm) +static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { unsigned long bits, pfn, pages; int bit; @@ -1216,11 +1039,7 @@ void free_basic_memory_bitmaps(void) unsigned int snapshot_additional_pages(struct zone *zone) { unsigned int rtree, nodes; - unsigned int res; - res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); - res += DIV_ROUND_UP(res * sizeof(struct bm_block), - LINKED_PAGE_DATA_SIZE); rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), LINKED_PAGE_DATA_SIZE); @@ -1229,7 +1048,7 @@ unsigned int snapshot_additional_pages(struct zone *zone) rtree += nodes; } - return 2 * (res + rtree); + return 2 * rtree; } #ifdef CONFIG_HIGHMEM -- cgit v1.1 From 0f7d83e85dbd5bb8032ebed7713edf59670fb074 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 21 Jul 2014 12:27:02 +0200 Subject: PM / Hibernate: Touch Soft Lockup Watchdog in rtree_next_node When a memory bitmap is fully populated on a large memory machine (several TB of RAM) it can take more than a minute to walk through all bits. This causes the soft lockup detector on these machine to report warnings. Avoid this by touching the soft lockup watchdog in the memory bitmap walking code. Signed-off-by: Joerg Roedel Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ab1998a..4fc5c32 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -769,6 +769,7 @@ static bool rtree_next_node(struct memory_bitmap *bm) if (&bm->cur.node->list != &bm->cur.zone->leaves) { bm->cur.node_pfn += BM_BITS_PER_BLOCK; bm->cur.node_bit = 0; + touch_softlockup_watchdog(); return true; } -- cgit v1.1