From f0263d2d222e9e25f2587e51a9dc58c6fb2a9352 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 19 Oct 2012 14:03:31 +0100 Subject: mm: highmem: export kmap_to_page for modules Some virtio device drivers (9p) need to translate high virtual addresses to physical addresses, which are inserted into the virtqueue for processing by userspace. This patch exports the kmap_to_page symbol, so that the affected drivers can be compiled as modules. Cc: stable@kernel.org Signed-off-by: Will Deacon Signed-off-by: Rusty Russell --- mm/highmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'mm') diff --git a/mm/highmem.c b/mm/highmem.c index d517cd1..2a07f97 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -105,6 +105,7 @@ struct page *kmap_to_page(void *vaddr) return virt_to_page(addr); } +EXPORT_SYMBOL(kmap_to_page); static void flush_all_zero_pkmaps(void) { -- cgit v1.1 From cda73a10eb3f493871ed39f468db50a65ebeddce Mon Sep 17 00:00:00 2001 From: Zlatko Calusic Date: Thu, 20 Dec 2012 00:25:13 +0100 Subject: mm: do not sleep in balance_pgdat if there's no i/o congestion On a 4GB RAM machine, where Normal zone is much smaller than DMA32 zone, the Normal zone gets fragmented in time. This requires relatively more pressure in balance_pgdat to get the zone above the required watermark. Unfortunately, the congestion_wait() call in there slows it down for a completely wrong reason, expecting that there's a lot of writeback/swapout, even when there's none (much more common). After a few days, when fragmentation progresses, this flawed logic translates to a very high CPU iowait times, even though there's no I/O congestion at all. If THP is enabled, the problem occurs sooner, but I was able to see it even on !THP kernels, just by giving it a bit more time to occur. The proper way to deal with this is to not wait, unless there's congestion. Thanks to Mel Gorman, we already have the function that perfectly fits the job. The patch was tested on a machine which nicely revealed the problem after only 1 day of uptime, and it's been working great. Signed-off-by: Zlatko Calusic Acked-by: Mel Gorman Signed-off-by: Linus Torvalds --- mm/vmscan.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index 828530e2..adc7e90 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2570,7 +2570,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int *classzone_idx) { - int all_zones_ok; + struct zone *unbalanced_zone; unsigned long balanced; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ @@ -2604,7 +2604,7 @@ loop_again: unsigned long lru_pages = 0; int has_under_min_watermark_zone = 0; - all_zones_ok = 1; + unbalanced_zone = NULL; balanced = 0; /* @@ -2743,7 +2743,7 @@ loop_again: } if (!zone_balanced(zone, testorder, 0, end_zone)) { - all_zones_ok = 0; + unbalanced_zone = zone; /* * We are still under min water mark. This * means that we have a GFP_ATOMIC allocation @@ -2776,7 +2776,7 @@ loop_again: pfmemalloc_watermark_ok(pgdat)) wake_up(&pgdat->pfmemalloc_wait); - if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) + if (!unbalanced_zone || (order && pgdat_balanced(pgdat, balanced, *classzone_idx))) break; /* kswapd: all done */ /* * OK, kswapd is getting into trouble. Take a nap, then take @@ -2786,7 +2786,7 @@ loop_again: if (has_under_min_watermark_zone) count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); else - congestion_wait(BLK_RW_ASYNC, HZ/10); + wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10); } /* @@ -2805,7 +2805,7 @@ out: * high-order: Balanced zones must make up at least 25% of the node * for the node to be balanced */ - if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, *classzone_idx)))) { + if (unbalanced_zone && (!order || !pgdat_balanced(pgdat, balanced, *classzone_idx))) { cond_resched(); try_to_freeze(); -- cgit v1.1 From b6b19f25f69149c0912788fb81466dd2310bb095 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 19 Dec 2012 17:44:29 -0800 Subject: ksm: make rmap walks more scalable The rmap walks in ksm.c are like those in rmap.c: they can safely be done with anon_vma_lock_read(). Signed-off-by: Hugh Dickins Acked-by: Mel Gorman Signed-off-by: Linus Torvalds --- mm/ksm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'mm') diff --git a/mm/ksm.c b/mm/ksm.c index 82dfb4b..5157385 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1624,7 +1624,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1648,7 +1648,7 @@ again: if (!search_new_forks || !mapcount) break; } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); if (!mapcount) goto out; } @@ -1678,7 +1678,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1697,11 +1697,11 @@ again: ret = try_to_unmap_one(page, vma, rmap_item->address, flags); if (ret != SWAP_AGAIN || !page_mapped(page)) { - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); goto out; } } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; @@ -1731,7 +1731,7 @@ again: struct anon_vma_chain *vmac; struct vm_area_struct *vma; - anon_vma_lock_write(anon_vma); + anon_vma_lock_read(anon_vma); anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { vma = vmac->vma; @@ -1749,11 +1749,11 @@ again: ret = rmap_one(page, vma, rmap_item->address, arg); if (ret != SWAP_AGAIN) { - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); goto out; } } - anon_vma_unlock(anon_vma); + anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; -- cgit v1.1