From 3bd9646334d8a3e7f91a94b9c217657f726de7ee Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Jul 2016 15:21:11 -0700 Subject: fbmon: remove unused function argument When building with "make W=1", we get a warning about an empty stub function that does nothing but reassign its one of its arguments: drivers/video/fbdev/core/fbmon.c: In function 'fb_edid_to_monspecs': drivers/video/fbdev/core/fbmon.c:1497:67: error: parameter 'specs' set but not used [-Werror=unused-but-set-parameter] We can simply make that function completely empty to avoid the warning. This prevents a warning which everyone will see after "CFLAGS: add -Wunused-but-set-parameter" is merged. Link: http://lkml.kernel.org/r/20160715203229.1771162-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Cc: Jean-Christophe Plagniol-Villard Cc: Tomi Valkeinen Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/fbdev/core/fbmon.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index 47c3191..62c0cf7 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -1496,7 +1496,6 @@ int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var) } void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) { - specs = NULL; } void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) { -- cgit v1.1 From a371d9f1cc49f58c8be3d28c88aceaef86cb59d0 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Tue, 26 Jul 2016 15:22:27 -0700 Subject: memory-hotplug: use zone_can_shift() for sysfs valid_zones attribute Since zone_can_shift() is being used to validate the target zone during onlining, it should also be used to determine the content of valid_zones. Link: http://lkml.kernel.org/r/1462816419-4479-4-git-send-email-arbab@linux.vnet.ibm.com Signed-off-by: Reza Arbab Reviewd-by: Yasuaki Ishimatsu Cc: Greg Kroah-Hartman Cc: Daniel Kiper Cc: Dan Williams Cc: Vlastimil Babka Cc: Tang Chen Cc: Joonsoo Kim Cc: David Vrabel Cc: Vitaly Kuznetsov Cc: David Rientjes Cc: Andrew Banman Cc: Chen Yucong Cc: Yasunori Goto Cc: Zhang Zhen Cc: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f46dba8..dc75de9 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -391,6 +391,7 @@ static ssize_t show_valid_zones(struct device *dev, unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; struct page *first_page; struct zone *zone; + int zone_shift = 0; start_pfn = section_nr_to_pfn(mem->start_section_nr); end_pfn = start_pfn + nr_pages; @@ -402,21 +403,26 @@ static ssize_t show_valid_zones(struct device *dev, zone = page_zone(first_page); - if (zone_idx(zone) == ZONE_MOVABLE - 1) { - /*The mem block is the last memoryblock of this zone.*/ - if (end_pfn == zone_end_pfn(zone)) - return sprintf(buf, "%s %s\n", - zone->name, (zone + 1)->name); + /* MMOP_ONLINE_KEEP */ + sprintf(buf, "%s", zone->name); + + /* MMOP_ONLINE_KERNEL */ + zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_NORMAL); + if (zone_shift) { + strcat(buf, " "); + strcat(buf, (zone + zone_shift)->name); } - if (zone_idx(zone) == ZONE_MOVABLE) { - /*The mem block is the first memoryblock of ZONE_MOVABLE.*/ - if (start_pfn == zone->zone_start_pfn) - return sprintf(buf, "%s %s\n", - zone->name, (zone - 1)->name); + /* MMOP_ONLINE_MOVABLE */ + zone_shift = zone_can_shift(start_pfn, nr_pages, ZONE_MOVABLE); + if (zone_shift) { + strcat(buf, " "); + strcat(buf, (zone + zone_shift)->name); } - return sprintf(buf, "%s\n", zone->name); + strcat(buf, "\n"); + + return strlen(buf); } static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL); #endif -- cgit v1.1 From 2a966b77ae3ede207e787e7538b87d1011c4364e Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 26 Jul 2016 15:22:33 -0700 Subject: mm: oom: add memcg to oom_control It's a part of oom context just like allocation order and nodemask, so let's move it to oom_control instead of passing it in the argument list. Link: http://lkml.kernel.org/r/40e03fd7aaf1f55c75d787128d6d17c5a71226c2.1464358556.git.vdavydov@virtuozzo.com Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Tetsuo Handa Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index e513940..52bbd27 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -363,6 +363,7 @@ static void moom_callback(struct work_struct *ignored) struct oom_control oc = { .zonelist = node_zonelist(first_memory_node, gfp_mask), .nodemask = NULL, + .memcg = NULL, .gfp_mask = gfp_mask, .order = -1, }; -- cgit v1.1 From 2aea8493d326bdf15446768333e1d2c91b040b5c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:42 -0700 Subject: zram: rename zstrm find-release functions This has started as a 'add zlib support' work, but after some thinking I saw no blockers for a bigger change -- a switch to crypto API. We don't have an idle zstreams list anymore and our write path now works absolutely differently, preventing preemption during compression. This removes possibilities of read paths preempting writes at wrong places and opens the door for a move from custom LZO/LZ4 compression backends implementation to a more generic one, using crypto compress API. This patch set also eliminates the need of a new context-less crypto API interface, which was quite hard to sell, so we can move along faster. benchmarks: (x86_64, 4GB, zram-perf script) perf reported run-time fio (max jobs=3). I performed fio test with the increasing number of parallel jobs (max to 3) on a 3G zram device, using `static' data and the following crypto comp algorithms: 842, deflate, lz4, lz4hc, lzo the output was: - test running time (which can tell us what algorithms performs faster) and - zram mm_stat (which tells the compressed memory size, max used memory, etc). It's just for information. for example, LZ4HC has twice the running time of LZO, but the compressed memory size is: 23592960 vs 34603008 bytes. test-fio-zram-842 197.907655282 seconds time elapsed 201.623142884 seconds time elapsed 226.854291345 seconds time elapsed test-fio-zram-DEFLATE 253.259516155 seconds time elapsed 258.148563401 seconds time elapsed 290.251909365 seconds time elapsed test-fio-zram-LZ4 27.022598717 seconds time elapsed 29.580522717 seconds time elapsed 33.293463430 seconds time elapsed test-fio-zram-LZ4HC 56.393954615 seconds time elapsed 74.904659747 seconds time elapsed 101.940998564 seconds time elapsed test-fio-zram-LZO 28.155948075 seconds time elapsed 30.390036330 seconds time elapsed 34.455773159 seconds time elapsed zram mm_stat-s (max fio jobs=3) test-fio-zram-842 mm_stat (jobs1): 3221225472 673185792 690266112 0 690266112 0 0 mm_stat (jobs2): 3221225472 673185792 690266112 0 690266112 0 0 mm_stat (jobs3): 3221225472 673185792 690266112 0 690266112 0 0 test-fio-zram-DEFLATE mm_stat (jobs1): 3221225472 24379392 37761024 0 37761024 0 0 mm_stat (jobs2): 3221225472 24379392 37761024 0 37761024 0 0 mm_stat (jobs3): 3221225472 24379392 37761024 0 37761024 0 0 test-fio-zram-LZ4 mm_stat (jobs1): 3221225472 23592960 37761024 0 37761024 0 0 mm_stat (jobs2): 3221225472 23592960 37761024 0 37761024 0 0 mm_stat (jobs3): 3221225472 23592960 37761024 0 37761024 0 0 test-fio-zram-LZ4HC mm_stat (jobs1): 3221225472 23592960 37761024 0 37761024 0 0 mm_stat (jobs2): 3221225472 23592960 37761024 0 37761024 0 0 mm_stat (jobs3): 3221225472 23592960 37761024 0 37761024 0 0 test-fio-zram-LZO mm_stat (jobs1): 3221225472 34603008 50335744 0 50335744 0 0 mm_stat (jobs2): 3221225472 34603008 50335744 0 50335744 0 0 mm_stat (jobs3): 3221225472 34603008 50335744 0 50339840 0 0 This patch (of 8): We don't perform any zstream idle list lookup anymore, so zcomp_strm_find()/zcomp_strm_release() names are not representative. Rename to zcomp_stream_get()/zcomp_stream_put(). Link: http://lkml.kernel.org/r/20160531122017.2878-2-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 4 ++-- drivers/block/zram/zcomp.h | 4 ++-- drivers/block/zram/zram_drv.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index b51a816..400f826 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -95,12 +95,12 @@ bool zcomp_available_algorithm(const char *comp) return find_backend(comp) != NULL; } -struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) +struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { return *get_cpu_ptr(comp->stream); } -void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) +void zcomp_stream_put(struct zcomp *comp) { put_cpu_ptr(comp->stream); } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index ffd88cb..944b8e6 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -48,8 +48,8 @@ bool zcomp_available_algorithm(const char *comp); struct zcomp *zcomp_create(const char *comp); void zcomp_destroy(struct zcomp *comp); -struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); -void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm); +struct zcomp_strm *zcomp_stream_get(struct zcomp *comp); +void zcomp_stream_put(struct zcomp *comp); int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, const unsigned char *src, size_t *dst_len); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8fcad8b..9361a5d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -695,7 +695,7 @@ compress_again: goto out; } - zstrm = zcomp_strm_find(zram->comp); + zstrm = zcomp_stream_get(zram->comp); ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); @@ -734,7 +734,7 @@ compress_again: __GFP_NOWARN | __GFP_HIGHMEM); if (!handle) { - zcomp_strm_release(zram->comp, zstrm); + zcomp_stream_put(zram->comp); zstrm = NULL; atomic64_inc(&zram->stats.writestall); @@ -769,7 +769,7 @@ compress_again: memcpy(cmem, src, clen); } - zcomp_strm_release(zram->comp, zstrm); + zcomp_stream_put(zram->comp); zstrm = NULL; zs_unmap_object(meta->mem_pool, handle); @@ -789,7 +789,7 @@ compress_again: atomic64_inc(&zram->stats.pages_stored); out: if (zstrm) - zcomp_strm_release(zram->comp, zstrm); + zcomp_stream_put(zram->comp); if (is_partial_io(bvec)) kfree(uncmem); return ret; -- cgit v1.1 From ebaf9ab56d9d5f350969bd1ea8f47234623c9684 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:45 -0700 Subject: zram: switch to crypto compress API We don't have an idle zstreams list anymore and our write path now works absolutely differently, preventing preemption during compression. This removes possibilities of read paths preempting writes at wrong places (which could badly affect the performance of both paths) and at the same time opens the door for a move from custom LZO/LZ4 compression backends implementation to a more generic one, using crypto compress API. Joonsoo Kim [1] attempted to do this a while ago, but faced with the need of introducing a new crypto API interface. The root cause was the fact that crypto API compression algorithms require a compression stream structure (in zram terminology) for both compression and decompression ops, while in reality only several of compression algorithms really need it. This resulted in a concept of context-less crypto API compression backends [2]. Both write and read paths, though, would have been executed with the preemption enabled, which in the worst case could have resulted in a decreased worst-case performance, e.g. consider the following case: CPU0 zram_write() spin_lock() take the last idle stream spin_unlock() << preempted >> zram_read() spin_lock() no idle streams spin_unlock() schedule() resuming zram_write compression() but it took me some time to realize that, and it took even longer to evolve zram and to make it ready for crypto API. The key turned out to be -- drop the idle streams list entirely. Without the idle streams list we are free to use compression algorithms that require compression stream for decompression (read), because streams are now placed in per-cpu data and each write path has to disable preemption for compression op, almost completely eliminating the aforementioned case (technically, we still have a small chance, because write path has a fast and a slow paths and the slow path is executed with the preemption enabled; but the frequency of failed fast path is too low). TEST ==== - 4 CPUs, x86_64 system - 3G zram, lzo - fio tests: read, randread, write, randwrite, rw, randrw test script [3] command: ZRAM_SIZE=3G LOG_SUFFIX=XXXX FIO_LOOPS=5 ./zram-fio-test.sh BASE PATCHED jobs1 READ: 2527.2MB/s 2482.7MB/s READ: 2102.7MB/s 2045.0MB/s WRITE: 1284.3MB/s 1324.3MB/s WRITE: 1080.7MB/s 1101.9MB/s READ: 430125KB/s 437498KB/s WRITE: 430538KB/s 437919KB/s READ: 399593KB/s 403987KB/s WRITE: 399910KB/s 404308KB/s jobs2 READ: 8133.5MB/s 7854.8MB/s READ: 7086.6MB/s 6912.8MB/s WRITE: 3177.2MB/s 3298.3MB/s WRITE: 2810.2MB/s 2871.4MB/s READ: 1017.6MB/s 1023.4MB/s WRITE: 1018.2MB/s 1023.1MB/s READ: 977836KB/s 984205KB/s WRITE: 979435KB/s 985814KB/s jobs3 READ: 13557MB/s 13391MB/s READ: 11876MB/s 11752MB/s WRITE: 4641.5MB/s 4682.1MB/s WRITE: 4164.9MB/s 4179.3MB/s READ: 1453.8MB/s 1455.1MB/s WRITE: 1455.1MB/s 1458.2MB/s READ: 1387.7MB/s 1395.7MB/s WRITE: 1386.1MB/s 1394.9MB/s jobs4 READ: 20271MB/s 20078MB/s READ: 18033MB/s 17928MB/s WRITE: 6176.8MB/s 6180.5MB/s WRITE: 5686.3MB/s 5705.3MB/s READ: 2009.4MB/s 2006.7MB/s WRITE: 2007.5MB/s 2004.9MB/s READ: 1929.7MB/s 1935.6MB/s WRITE: 1926.8MB/s 1932.6MB/s jobs5 READ: 18823MB/s 19024MB/s READ: 18968MB/s 19071MB/s WRITE: 6191.6MB/s 6372.1MB/s WRITE: 5818.7MB/s 5787.1MB/s READ: 2011.7MB/s 1981.3MB/s WRITE: 2011.4MB/s 1980.1MB/s READ: 1949.3MB/s 1935.7MB/s WRITE: 1940.4MB/s 1926.1MB/s jobs6 READ: 21870MB/s 21715MB/s READ: 19957MB/s 19879MB/s WRITE: 6528.4MB/s 6537.6MB/s WRITE: 6098.9MB/s 6073.6MB/s READ: 2048.6MB/s 2049.9MB/s WRITE: 2041.7MB/s 2042.9MB/s READ: 2013.4MB/s 1990.4MB/s WRITE: 2009.4MB/s 1986.5MB/s jobs7 READ: 21359MB/s 21124MB/s READ: 19746MB/s 19293MB/s WRITE: 6660.4MB/s 6518.8MB/s WRITE: 6211.6MB/s 6193.1MB/s READ: 2089.7MB/s 2080.6MB/s WRITE: 2085.8MB/s 2076.5MB/s READ: 2041.2MB/s 2052.5MB/s WRITE: 2037.5MB/s 2048.8MB/s jobs8 READ: 20477MB/s 19974MB/s READ: 18922MB/s 18576MB/s WRITE: 6851.9MB/s 6788.3MB/s WRITE: 6407.7MB/s 6347.5MB/s READ: 2134.8MB/s 2136.1MB/s WRITE: 2132.8MB/s 2134.4MB/s READ: 2074.2MB/s 2069.6MB/s WRITE: 2087.3MB/s 2082.4MB/s jobs9 READ: 19797MB/s 19994MB/s READ: 18806MB/s 18581MB/s WRITE: 6878.7MB/s 6822.7MB/s WRITE: 6456.8MB/s 6447.2MB/s READ: 2141.1MB/s 2154.7MB/s WRITE: 2144.4MB/s 2157.3MB/s READ: 2084.1MB/s 2085.1MB/s WRITE: 2091.5MB/s 2092.5MB/s jobs10 READ: 19794MB/s 19784MB/s READ: 18794MB/s 18745MB/s WRITE: 6984.4MB/s 6676.3MB/s WRITE: 6532.3MB/s 6342.7MB/s READ: 2150.6MB/s 2155.4MB/s WRITE: 2156.8MB/s 2161.5MB/s READ: 2106.4MB/s 2095.6MB/s WRITE: 2109.7MB/s 2098.4MB/s BASE PATCHED jobs1 perfstat stalled-cycles-frontend 102,480,595,419 ( 41.53%) 114,508,864,804 ( 46.92%) stalled-cycles-backend 51,941,417,832 ( 21.05%) 46,836,112,388 ( 19.19%) instructions 283,612,054,215 ( 1.15) 283,918,134,959 ( 1.16) branches 56,372,560,385 ( 724.923) 56,449,814,753 ( 733.766) branch-misses 374,826,000 ( 0.66%) 326,935,859 ( 0.58%) jobs2 perfstat stalled-cycles-frontend 155,142,745,777 ( 40.99%) 164,170,979,198 ( 43.82%) stalled-cycles-backend 70,813,866,387 ( 18.71%) 66,456,858,165 ( 17.74%) instructions 463,436,648,173 ( 1.22) 464,221,890,191 ( 1.24) branches 91,088,733,902 ( 760.088) 91,278,144,546 ( 769.133) branch-misses 504,460,363 ( 0.55%) 394,033,842 ( 0.43%) jobs3 perfstat stalled-cycles-frontend 201,300,397,212 ( 39.84%) 223,969,902,257 ( 44.44%) stalled-cycles-backend 87,712,593,974 ( 17.36%) 81,618,888,712 ( 16.19%) instructions 642,869,545,023 ( 1.27) 644,677,354,132 ( 1.28) branches 125,724,560,594 ( 690.682) 126,133,159,521 ( 694.542) branch-misses 527,941,798 ( 0.42%) 444,782,220 ( 0.35%) jobs4 perfstat stalled-cycles-frontend 246,701,197,429 ( 38.12%) 280,076,030,886 ( 43.29%) stalled-cycles-backend 119,050,341,112 ( 18.40%) 110,955,641,671 ( 17.15%) instructions 822,716,962,127 ( 1.27) 825,536,969,320 ( 1.28) branches 160,590,028,545 ( 688.614) 161,152,996,915 ( 691.068) branch-misses 650,295,287 ( 0.40%) 550,229,113 ( 0.34%) jobs5 perfstat stalled-cycles-frontend 298,958,462,516 ( 38.30%) 344,852,200,358 ( 44.16%) stalled-cycles-backend 137,558,742,122 ( 17.62%) 129,465,067,102 ( 16.58%) instructions 1,005,714,688,752 ( 1.29) 1,007,657,999,432 ( 1.29) branches 195,988,773,962 ( 697.730) 196,446,873,984 ( 700.319) branch-misses 695,818,940 ( 0.36%) 624,823,263 ( 0.32%) jobs6 perfstat stalled-cycles-frontend 334,497,602,856 ( 36.71%) 387,590,419,779 ( 42.38%) stalled-cycles-backend 163,539,365,335 ( 17.95%) 152,640,193,639 ( 16.69%) instructions 1,184,738,177,851 ( 1.30) 1,187,396,281,677 ( 1.30) branches 230,592,915,640 ( 702.902) 231,253,802,882 ( 702.356) branch-misses 747,934,786 ( 0.32%) 643,902,424 ( 0.28%) jobs7 perfstat stalled-cycles-frontend 396,724,684,187 ( 37.71%) 460,705,858,952 ( 43.84%) stalled-cycles-backend 188,096,616,496 ( 17.88%) 175,785,787,036 ( 16.73%) instructions 1,364,041,136,608 ( 1.30) 1,366,689,075,112 ( 1.30) branches 265,253,096,936 ( 700.078) 265,890,524,883 ( 702.839) branch-misses 784,991,589 ( 0.30%) 729,196,689 ( 0.27%) jobs8 perfstat stalled-cycles-frontend 440,248,299,870 ( 36.92%) 509,554,793,816 ( 42.46%) stalled-cycles-backend 222,575,930,616 ( 18.67%) 213,401,248,432 ( 17.78%) instructions 1,542,262,045,114 ( 1.29) 1,545,233,932,257 ( 1.29) branches 299,775,178,439 ( 697.666) 300,528,458,505 ( 694.769) branch-misses 847,496,084 ( 0.28%) 748,794,308 ( 0.25%) jobs9 perfstat stalled-cycles-frontend 506,269,882,480 ( 37.86%) 592,798,032,820 ( 44.43%) stalled-cycles-backend 253,192,498,861 ( 18.93%) 233,727,666,185 ( 17.52%) instructions 1,721,985,080,913 ( 1.29) 1,724,666,236,005 ( 1.29) branches 334,517,360,255 ( 694.134) 335,199,758,164 ( 697.131) branch-misses 873,496,730 ( 0.26%) 815,379,236 ( 0.24%) jobs10 perfstat stalled-cycles-frontend 549,063,363,749 ( 37.18%) 651,302,376,662 ( 43.61%) stalled-cycles-backend 281,680,986,810 ( 19.07%) 277,005,235,582 ( 18.55%) instructions 1,901,859,271,180 ( 1.29) 1,906,311,064,230 ( 1.28) branches 369,398,536,153 ( 694.004) 370,527,696,358 ( 688.409) branch-misses 967,929,335 ( 0.26%) 890,125,056 ( 0.24%) BASE PATCHED seconds elapsed 79.421641008 78.735285546 seconds elapsed 61.471246133 60.869085949 seconds elapsed 62.317058173 62.224188495 seconds elapsed 60.030739363 60.081102518 seconds elapsed 74.070398362 74.317582865 seconds elapsed 84.985953007 85.414364176 seconds elapsed 97.724553255 98.173311344 seconds elapsed 109.488066758 110.268399318 seconds elapsed 122.768189405 122.967164498 seconds elapsed 135.130035105 136.934770801 On my other system (8 x86_64 CPUs, short version of test results): BASE PATCHED seconds elapsed 19.518065994 19.806320662 seconds elapsed 15.172772749 15.594718291 seconds elapsed 13.820925970 13.821708564 seconds elapsed 13.293097816 14.585206405 seconds elapsed 16.207284118 16.064431606 seconds elapsed 17.958376158 17.771825767 seconds elapsed 19.478009164 19.602961508 seconds elapsed 21.347152811 21.352318709 seconds elapsed 24.478121126 24.171088735 seconds elapsed 26.865057442 26.767327618 So performance-wise the numbers are quite similar. Also update zcomp interface to be more aligned with the crypto API. [1] http://marc.info/?l=linux-kernel&m=144480832108927&w=2 [2] http://marc.info/?l=linux-kernel&m=145379613507518&w=2 [3] https://github.com/sergey-senozhatsky/zram-perf-test Link: http://lkml.kernel.org/r/20160531122017.2878-3-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Suggested-by: Minchan Kim Suggested-by: Joonsoo Kim Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/Kconfig | 10 +++--- drivers/block/zram/zcomp.c | 76 ++++++++++++++++++++++++++----------------- drivers/block/zram/zcomp.h | 17 ++++------ drivers/block/zram/zram_drv.c | 18 ++++++---- 4 files changed, 69 insertions(+), 52 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 386ba3d..2252cd7 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -1,8 +1,7 @@ config ZRAM tristate "Compressed RAM block device support" - depends on BLOCK && SYSFS && ZSMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS + depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO + select CRYPTO_LZO default n help Creates virtual block devices called /dev/zramX (X = 0, 1, ...). @@ -18,9 +17,8 @@ config ZRAM config ZRAM_LZ4_COMPRESS bool "Enable LZ4 algorithm support" depends on ZRAM - select LZ4_COMPRESS - select LZ4_DECOMPRESS + select CRYPTO_LZ4 default n help This option enables LZ4 compression algorithm support. Compression - algorithm can be changed using `comp_algorithm' device attribute. \ No newline at end of file + algorithm can be changed using `comp_algorithm' device attribute. diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 400f826..f357268 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -14,42 +14,39 @@ #include #include #include +#include #include "zcomp.h" -#include "zcomp_lzo.h" -#ifdef CONFIG_ZRAM_LZ4_COMPRESS -#include "zcomp_lz4.h" -#endif -static struct zcomp_backend *backends[] = { - &zcomp_lzo, +static const char * const backends[] = { + "lzo", #ifdef CONFIG_ZRAM_LZ4_COMPRESS - &zcomp_lz4, + "lz4", #endif NULL }; -static struct zcomp_backend *find_backend(const char *compress) +static const char *find_backend(const char *compress) { int i = 0; while (backends[i]) { - if (sysfs_streq(compress, backends[i]->name)) + if (sysfs_streq(compress, backends[i])) break; i++; } return backends[i]; } -static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) +static void zcomp_strm_free(struct zcomp_strm *zstrm) { - if (zstrm->private) - comp->backend->destroy(zstrm->private); + if (!IS_ERR_OR_NULL(zstrm->tfm)) + crypto_free_comp(zstrm->tfm); free_pages((unsigned long)zstrm->buffer, 1); kfree(zstrm); } /* - * allocate new zcomp_strm structure with ->private initialized by + * allocate new zcomp_strm structure with ->tfm initialized by * backend, return NULL on error */ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) @@ -58,14 +55,14 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) if (!zstrm) return NULL; - zstrm->private = comp->backend->create(flags); + zstrm->tfm = crypto_alloc_comp(comp->name, 0, 0); /* * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ zstrm->buffer = (void *)__get_free_pages(flags | __GFP_ZERO, 1); - if (!zstrm->private || !zstrm->buffer) { - zcomp_strm_free(comp, zstrm); + if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) { + zcomp_strm_free(zstrm); zstrm = NULL; } return zstrm; @@ -78,12 +75,12 @@ ssize_t zcomp_available_show(const char *comp, char *buf) int i = 0; while (backends[i]) { - if (!strcmp(comp, backends[i]->name)) + if (!strcmp(comp, backends[i])) sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, - "[%s] ", backends[i]->name); + "[%s] ", backends[i]); else sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, - "%s ", backends[i]->name); + "%s ", backends[i]); i++; } sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); @@ -105,17 +102,38 @@ void zcomp_stream_put(struct zcomp *comp) put_cpu_ptr(comp->stream); } -int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, - const unsigned char *src, size_t *dst_len) +int zcomp_compress(struct zcomp_strm *zstrm, + const void *src, unsigned int *dst_len) { - return comp->backend->compress(src, zstrm->buffer, dst_len, - zstrm->private); + /* + * Our dst memory (zstrm->buffer) is always `2 * PAGE_SIZE' sized + * because sometimes we can endup having a bigger compressed data + * due to various reasons: for example compression algorithms tend + * to add some padding to the compressed buffer. Speaking of padding, + * comp algorithm `842' pads the compressed length to multiple of 8 + * and returns -ENOSP when the dst memory is not big enough, which + * is not something that ZRAM wants to see. We can handle the + * `compressed_size > PAGE_SIZE' case easily in ZRAM, but when we + * receive -ERRNO from the compressing backend we can't help it + * anymore. To make `842' happy we need to tell the exact size of + * the dst buffer, zram_drv will take care of the fact that + * compressed buffer is too big. + */ + *dst_len = PAGE_SIZE * 2; + + return crypto_comp_compress(zstrm->tfm, + src, PAGE_SIZE, + zstrm->buffer, dst_len); } -int zcomp_decompress(struct zcomp *comp, const unsigned char *src, - size_t src_len, unsigned char *dst) +int zcomp_decompress(struct zcomp_strm *zstrm, + const void *src, unsigned int src_len, void *dst) { - return comp->backend->decompress(src, src_len, dst); + unsigned int dst_len = PAGE_SIZE; + + return crypto_comp_decompress(zstrm->tfm, + src, src_len, + dst, &dst_len); } static int __zcomp_cpu_notifier(struct zcomp *comp, @@ -138,7 +156,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, case CPU_UP_CANCELED: zstrm = *per_cpu_ptr(comp->stream, cpu); if (!IS_ERR_OR_NULL(zstrm)) - zcomp_strm_free(comp, zstrm); + zcomp_strm_free(zstrm); *per_cpu_ptr(comp->stream, cpu) = NULL; break; default: @@ -209,7 +227,7 @@ void zcomp_destroy(struct zcomp *comp) struct zcomp *zcomp_create(const char *compress) { struct zcomp *comp; - struct zcomp_backend *backend; + const char *backend; int error; backend = find_backend(compress); @@ -220,7 +238,7 @@ struct zcomp *zcomp_create(const char *compress) if (!comp) return ERR_PTR(-ENOMEM); - comp->backend = backend; + comp->name = backend; error = zcomp_init(comp); if (error) { kfree(comp); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 944b8e6..c914ab7 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -13,12 +13,7 @@ struct zcomp_strm { /* compression/decompression buffer */ void *buffer; - /* - * The private data of the compression stream, only compression - * stream backend can touch this (e.g. compression algorithm - * working memory) - */ - void *private; + struct crypto_comp *tfm; }; /* static compression backend */ @@ -40,6 +35,8 @@ struct zcomp { struct zcomp_strm * __percpu *stream; struct zcomp_backend *backend; struct notifier_block notifier; + + const char *name; }; ssize_t zcomp_available_show(const char *comp, char *buf); @@ -51,11 +48,11 @@ void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_stream_get(struct zcomp *comp); void zcomp_stream_put(struct zcomp *comp); -int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, - const unsigned char *src, size_t *dst_len); +int zcomp_compress(struct zcomp_strm *zstrm, + const void *src, unsigned int *dst_len); -int zcomp_decompress(struct zcomp *comp, const unsigned char *src, - size_t src_len, unsigned char *dst); +int zcomp_decompress(struct zcomp_strm *zstrm, + const void *src, unsigned int src_len, void *dst); bool zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9361a5d..65d1403 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -563,7 +563,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; - size_t size; + unsigned int size; bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); handle = meta->table[index].handle; @@ -576,10 +576,14 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); - if (size == PAGE_SIZE) + if (size == PAGE_SIZE) { copy_page(mem, cmem); - else - ret = zcomp_decompress(zram->comp, cmem, size, mem); + } else { + struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); + + ret = zcomp_decompress(zstrm, cmem, size, mem); + zcomp_stream_put(zram->comp); + } zs_unmap_object(meta->mem_pool, handle); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); @@ -646,7 +650,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { int ret = 0; - size_t clen; + unsigned int clen; unsigned long handle = 0; struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; @@ -696,7 +700,7 @@ compress_again: } zstrm = zcomp_stream_get(zram->comp); - ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); + ret = zcomp_compress(zstrm, uncmem, &clen); if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; @@ -744,7 +748,7 @@ compress_again: if (handle) goto compress_again; - pr_err("Error allocating memory for compressed page: %u, size=%zu\n", + pr_err("Error allocating memory for compressed page: %u, size=%u\n", index, clen); ret = -ENOMEM; goto out; -- cgit v1.1 From 415403be37e204632b17bdb6857890fe5a220cea Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:48 -0700 Subject: zram: use crypto api to check alg availability There is no way to get a string with all the crypto comp algorithms supported by the crypto comp engine, so we need to maintain our own backends list. At the same time we additionally need to use crypto_has_comp() to make sure that the user has requested a compression algorithm that is recognized by the crypto comp engine. Relying on /proc/crypto is not an options here, because it does not show not-yet-inserted compression modules. Example: modprobe zram cat /proc/crypto | grep -i lz4 modprobe lz4 cat /proc/crypto | grep -i lz4 name : lz4 driver : lz4-generic module : lz4 So the user can't tell exactly if the lz4 is really supported from /proc/crypto output, unless someone or something has loaded it. This patch also adds crypto_has_comp() to zcomp_available_show(). We store all the compression algorithms names in zcomp's `backends' array, regardless the CONFIG_CRYPTO_FOO configuration, but show only those that are also supported by crypto engine. This helps user to know the exact list of compression algorithms that can be used. Example: module lz4 is not loaded yet, but is supported by the crypto engine. /proc/crypto has no information on this module, while zram's `comp_algorithm' lists it: cat /proc/crypto | grep -i lz4 cat /sys/block/zram0/comp_algorithm [lzo] lz4 deflate lz4hc 842 We still use the `backends' array to determine if the requested compression backend is known to crypto api. This array, however, may not contain some entries, therefore as the last step we call crypto_has_comp() function which attempts to insmod the requested compression algorithm to determine if crypto api supports it. The advantage of this method is that now we permit the usage of out-of-tree crypto compression modules (implementing S/W or H/W compression). [sergey.senozhatsky@gmail.com: zram-use-crypto-api-to-check-alg-availability-v3] Link: http://lkml.kernel.org/r/20160604024902.11778-4-sergey.senozhatsky@gmail.com Link: http://lkml.kernel.org/r/20160531122017.2878-5-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 61 ++++++++++++++++++++++++++----------------- drivers/block/zram/zram_drv.c | 16 +++++++----- drivers/block/zram/zram_drv.h | 5 ++-- 3 files changed, 49 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index f357268..a2b4eb8 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -26,17 +26,6 @@ static const char * const backends[] = { NULL }; -static const char *find_backend(const char *compress) -{ - int i = 0; - while (backends[i]) { - if (sysfs_streq(compress, backends[i])) - break; - i++; - } - return backends[i]; -} - static void zcomp_strm_free(struct zcomp_strm *zstrm) { if (!IS_ERR_OR_NULL(zstrm->tfm)) @@ -68,30 +57,56 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) return zstrm; } +bool zcomp_available_algorithm(const char *comp) +{ + int i = 0; + + while (backends[i]) { + if (sysfs_streq(comp, backends[i])) + return true; + i++; + } + + /* + * Crypto does not ignore a trailing new line symbol, + * so make sure you don't supply a string containing + * one. + * This also means that we permit zcomp initialisation + * with any compressing algorithm known to crypto api. + */ + return crypto_has_comp(comp, 0, 0) == 1; +} + /* show available compressors */ ssize_t zcomp_available_show(const char *comp, char *buf) { + bool known_algorithm = false; ssize_t sz = 0; int i = 0; - while (backends[i]) { - if (!strcmp(comp, backends[i])) + for (; backends[i]; i++) { + if (!strcmp(comp, backends[i])) { + known_algorithm = true; sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "[%s] ", backends[i]); - else + } else { sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "%s ", backends[i]); - i++; + } } + + /* + * Out-of-tree module known to crypto api or a missing + * entry in `backends'. + */ + if (!known_algorithm && crypto_has_comp(comp, 0, 0) == 1) + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "[%s] ", comp); + sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); return sz; } -bool zcomp_available_algorithm(const char *comp) -{ - return find_backend(comp) != NULL; -} - struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { return *get_cpu_ptr(comp->stream); @@ -227,18 +242,16 @@ void zcomp_destroy(struct zcomp *comp) struct zcomp *zcomp_create(const char *compress) { struct zcomp *comp; - const char *backend; int error; - backend = find_backend(compress); - if (!backend) + if (!zcomp_available_algorithm(compress)) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); if (!comp) return ERR_PTR(-ENOMEM); - comp->name = backend; + comp->name = compress; error = zcomp_init(comp); if (error) { kfree(comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 65d1403..c2a1d7d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -342,9 +342,16 @@ static ssize_t comp_algorithm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); + char compressor[CRYPTO_MAX_ALG_NAME]; size_t sz; - if (!zcomp_available_algorithm(buf)) + strlcpy(compressor, buf, sizeof(compressor)); + /* ignore trailing newline */ + sz = strlen(compressor); + if (sz > 0 && compressor[sz - 1] == '\n') + compressor[sz - 1] = 0x00; + + if (!zcomp_available_algorithm(compressor)) return -EINVAL; down_write(&zram->init_lock); @@ -353,13 +360,8 @@ static ssize_t comp_algorithm_store(struct device *dev, pr_info("Can't change algorithm for initialized device\n"); return -EBUSY; } - strlcpy(zram->compressor, buf, sizeof(zram->compressor)); - - /* ignore trailing newline */ - sz = strlen(zram->compressor); - if (sz > 0 && zram->compressor[sz - 1] == '\n') - zram->compressor[sz - 1] = 0x00; + strlcpy(zram->compressor, compressor, sizeof(compressor)); up_write(&zram->init_lock); return len; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 3f5bf66..74fcf10 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -15,8 +15,9 @@ #ifndef _ZRAM_DRV_H_ #define _ZRAM_DRV_H_ -#include +#include #include +#include #include "zcomp.h" @@ -113,7 +114,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - char compressor[10]; + char compressor[CRYPTO_MAX_ALG_NAME]; /* * zram is claimed so open request will be failed */ -- cgit v1.1 From ce1ed9f98e888aa220fb09da2e2bcfcfba218a27 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:54 -0700 Subject: zram: delete custom lzo/lz4 Remove lzo/lz4 backends, we use crypto API now. [sergey.senozhatsky@gmail.com: zram-delete-custom-lzo-lz4-v3] Link: http://lkml.kernel.org/r/20160604024902.11778-6-sergey.senozhatsky@gmail.com Link: http://lkml.kernel.org/r/20160531122017.2878-7-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/Kconfig | 9 ------- drivers/block/zram/Makefile | 4 +-- drivers/block/zram/zcomp.c | 2 +- drivers/block/zram/zcomp.h | 15 ----------- drivers/block/zram/zcomp_lz4.c | 56 ------------------------------------------ drivers/block/zram/zcomp_lz4.h | 17 ------------- drivers/block/zram/zcomp_lzo.c | 56 ------------------------------------------ drivers/block/zram/zcomp_lzo.h | 17 ------------- 8 files changed, 2 insertions(+), 174 deletions(-) delete mode 100644 drivers/block/zram/zcomp_lz4.c delete mode 100644 drivers/block/zram/zcomp_lz4.h delete mode 100644 drivers/block/zram/zcomp_lzo.c delete mode 100644 drivers/block/zram/zcomp_lzo.h (limited to 'drivers') diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 2252cd7..b8ecba6 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -13,12 +13,3 @@ config ZRAM disks and maybe many more. See zram.txt for more information. - -config ZRAM_LZ4_COMPRESS - bool "Enable LZ4 algorithm support" - depends on ZRAM - select CRYPTO_LZ4 - default n - help - This option enables LZ4 compression algorithm support. Compression - algorithm can be changed using `comp_algorithm' device attribute. diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index be0763f..9e2b79e 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,5 +1,3 @@ -zram-y := zcomp_lzo.o zcomp.o zram_drv.o - -zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o +zram-y := zcomp.o zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index a2b4eb8..9ab45d4 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -20,7 +20,7 @@ static const char * const backends[] = { "lzo", -#ifdef CONFIG_ZRAM_LZ4_COMPRESS +#if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif NULL diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index c914ab7..478cac2 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -16,24 +16,9 @@ struct zcomp_strm { struct crypto_comp *tfm; }; -/* static compression backend */ -struct zcomp_backend { - int (*compress)(const unsigned char *src, unsigned char *dst, - size_t *dst_len, void *private); - - int (*decompress)(const unsigned char *src, size_t src_len, - unsigned char *dst); - - void *(*create)(gfp_t flags); - void (*destroy)(void *private); - - const char *name; -}; - /* dynamic per-device compression frontend */ struct zcomp { struct zcomp_strm * __percpu *stream; - struct zcomp_backend *backend; struct notifier_block notifier; const char *name; diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c deleted file mode 100644 index 0110086..0000000 --- a/drivers/block/zram/zcomp_lz4.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include "zcomp_lz4.h" - -static void *zcomp_lz4_create(gfp_t flags) -{ - void *ret; - - ret = kmalloc(LZ4_MEM_COMPRESS, flags); - if (!ret) - ret = __vmalloc(LZ4_MEM_COMPRESS, - flags | __GFP_HIGHMEM, - PAGE_KERNEL); - return ret; -} - -static void zcomp_lz4_destroy(void *private) -{ - kvfree(private); -} - -static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, - size_t *dst_len, void *private) -{ - /* return : Success if return 0 */ - return lz4_compress(src, PAGE_SIZE, dst, dst_len, private); -} - -static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len, - unsigned char *dst) -{ - size_t dst_len = PAGE_SIZE; - /* return : Success if return 0 */ - return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len); -} - -struct zcomp_backend zcomp_lz4 = { - .compress = zcomp_lz4_compress, - .decompress = zcomp_lz4_decompress, - .create = zcomp_lz4_create, - .destroy = zcomp_lz4_destroy, - .name = "lz4", -}; diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h deleted file mode 100644 index 60613fb..0000000 --- a/drivers/block/zram/zcomp_lz4.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ZCOMP_LZ4_H_ -#define _ZCOMP_LZ4_H_ - -#include "zcomp.h" - -extern struct zcomp_backend zcomp_lz4; - -#endif /* _ZCOMP_LZ4_H_ */ diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c deleted file mode 100644 index ed7a1f0..0000000 --- a/drivers/block/zram/zcomp_lzo.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include "zcomp_lzo.h" - -static void *lzo_create(gfp_t flags) -{ - void *ret; - - ret = kmalloc(LZO1X_MEM_COMPRESS, flags); - if (!ret) - ret = __vmalloc(LZO1X_MEM_COMPRESS, - flags | __GFP_HIGHMEM, - PAGE_KERNEL); - return ret; -} - -static void lzo_destroy(void *private) -{ - kvfree(private); -} - -static int lzo_compress(const unsigned char *src, unsigned char *dst, - size_t *dst_len, void *private) -{ - int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private); - return ret == LZO_E_OK ? 0 : ret; -} - -static int lzo_decompress(const unsigned char *src, size_t src_len, - unsigned char *dst) -{ - size_t dst_len = PAGE_SIZE; - int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len); - return ret == LZO_E_OK ? 0 : ret; -} - -struct zcomp_backend zcomp_lzo = { - .compress = lzo_compress, - .decompress = lzo_decompress, - .create = lzo_create, - .destroy = lzo_destroy, - .name = "lzo", -}; diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h deleted file mode 100644 index 128c580..0000000 --- a/drivers/block/zram/zcomp_lzo.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2014 Sergey Senozhatsky. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ZCOMP_LZO_H_ -#define _ZCOMP_LZO_H_ - -#include "zcomp.h" - -extern struct zcomp_backend zcomp_lzo; - -#endif /* _ZCOMP_LZO_H_ */ -- cgit v1.1 From eb9f56d82547db407779967a2251ea28969245b0 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:56 -0700 Subject: zram: add more compression algorithms Add "deflate", "lz4hc", "842" algorithms to the list of known compression backends. The real availability of those algorithms, however, depends on the corresponding CONFIG_CRYPTO_FOO config options. [sergey.senozhatsky@gmail.com: zram-add-more-compression-algorithms-v3] Link: http://lkml.kernel.org/r/20160604024902.11778-7-sergey.senozhatsky@gmail.com Link: http://lkml.kernel.org/r/20160531122017.2878-8-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 9ab45d4..32e521a 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -23,6 +23,15 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_LZ4) "lz4", #endif +#if IS_ENABLED(CONFIG_CRYPTO_DEFLATE) + "deflate", +#endif +#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC) + "lz4hc", +#endif +#if IS_ENABLED(CONFIG_CRYPTO_842) + "842", +#endif NULL }; -- cgit v1.1 From 16d37725a042cc66f9ee95889dd40e734264508e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 26 Jul 2016 15:22:59 -0700 Subject: zram: drop gfp_t from zcomp_strm_alloc() We now allocate streams from CPU_UP hot-plug path, there are no context-dependent stream allocations anymore and we can schedule from zcomp_strm_alloc(). Use GFP_KERNEL directly and drop a gfp_t parameter. Link: http://lkml.kernel.org/r/20160531122017.2878-9-sergey.senozhatsky@gmail.com Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 32e521a..4b5cd3a 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -47,9 +47,9 @@ static void zcomp_strm_free(struct zcomp_strm *zstrm) * allocate new zcomp_strm structure with ->tfm initialized by * backend, return NULL on error */ -static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) +static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) { - struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), flags); + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); if (!zstrm) return NULL; @@ -58,7 +58,7 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp, gfp_t flags) * allocate 2 pages. 1 for compressed data, plus 1 extra for the * case when compressed size is larger than the original one */ - zstrm->buffer = (void *)__get_free_pages(flags | __GFP_ZERO, 1); + zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); if (IS_ERR_OR_NULL(zstrm->tfm) || !zstrm->buffer) { zcomp_strm_free(zstrm); zstrm = NULL; @@ -169,7 +169,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, case CPU_UP_PREPARE: if (WARN_ON(*per_cpu_ptr(comp->stream, cpu))) break; - zstrm = zcomp_strm_alloc(comp, GFP_KERNEL); + zstrm = zcomp_strm_alloc(comp); if (IS_ERR_OR_NULL(zstrm)) { pr_err("Can't allocate a compression stream\n"); return NOTIFY_BAD; -- cgit v1.1 From b1123ea6d3b3da25af5c8a9d843bd07ab63213f4 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 26 Jul 2016 15:23:09 -0700 Subject: mm: balloon: use general non-lru movable page feature Now, VM has a feature to migrate non-lru movable pages so balloon doesn't need custom migration hooks in migrate.c and compaction.c. Instead, this patch implements the page->mapping->a_ops-> {isolate|migrate|putback} functions. With that, we could remove hooks for ballooning in general migration functions and make balloon compaction simple. [akpm@linux-foundation.org: compaction.h requires that the includer first include node.h] Link: http://lkml.kernel.org/r/1464736881-24886-4-git-send-email-minchan@kernel.org Signed-off-by: Gioh Kim Signed-off-by: Minchan Kim Acked-by: Vlastimil Babka Cc: Rafael Aquini Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 54 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 476c0e3..88d5609 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * Balloon device works in 4K page units. So each page is pointed to by @@ -45,6 +46,10 @@ static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; module_param(oom_pages, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); +#ifdef CONFIG_BALLOON_COMPACTION +static struct vfsmount *balloon_mnt; +#endif + struct virtio_balloon { struct virtio_device *vdev; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; @@ -488,8 +493,26 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, put_page(page); /* balloon reference */ - return MIGRATEPAGE_SUCCESS; + return 0; } + +static struct dentry *balloon_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + static const struct dentry_operations ops = { + .d_dname = simple_dname, + }; + + return mount_pseudo(fs_type, "balloon-kvm:", NULL, &ops, + BALLOON_KVM_MAGIC); +} + +static struct file_system_type balloon_fs = { + .name = "balloon-kvm", + .mount = balloon_mount, + .kill_sb = kill_anon_super, +}; + #endif /* CONFIG_BALLOON_COMPACTION */ static int virtballoon_probe(struct virtio_device *vdev) @@ -519,9 +542,6 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vdev = vdev; balloon_devinfo_init(&vb->vb_dev_info); -#ifdef CONFIG_BALLOON_COMPACTION - vb->vb_dev_info.migratepage = virtballoon_migratepage; -#endif err = init_vqs(vb); if (err) @@ -531,13 +551,33 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; err = register_oom_notifier(&vb->nb); if (err < 0) - goto out_oom_notify; + goto out_del_vqs; + +#ifdef CONFIG_BALLOON_COMPACTION + balloon_mnt = kern_mount(&balloon_fs); + if (IS_ERR(balloon_mnt)) { + err = PTR_ERR(balloon_mnt); + unregister_oom_notifier(&vb->nb); + goto out_del_vqs; + } + + vb->vb_dev_info.migratepage = virtballoon_migratepage; + vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); + if (IS_ERR(vb->vb_dev_info.inode)) { + err = PTR_ERR(vb->vb_dev_info.inode); + kern_unmount(balloon_mnt); + unregister_oom_notifier(&vb->nb); + vb->vb_dev_info.inode = NULL; + goto out_del_vqs; + } + vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; +#endif virtio_device_ready(vdev); return 0; -out_oom_notify: +out_del_vqs: vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -571,6 +611,8 @@ static void virtballoon_remove(struct virtio_device *vdev) cancel_work_sync(&vb->update_balloon_stats_work); remove_common(vb); + if (vb->vb_dev_info.inode) + iput(vb->vb_dev_info.inode); kfree(vb); } -- cgit v1.1 From 9bc482d3460501ac809457af26b46b72cd7dc212 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 26 Jul 2016 15:23:34 -0700 Subject: zram: use __GFP_MOVABLE for memory allocation Zsmalloc is ready for page migration so zram can use __GFP_MOVABLE from now on. I did test to see how it helps to make higher order pages. Test scenario is as follows. KVM guest, 1G memory, ext4 formated zram block device, for i in `seq 1 8`; do dd if=/dev/vda1 of=mnt/test$i.txt bs=128M count=1 & done wait `pidof dd` for i in `seq 1 2 8`; do rm -rf mnt/test$i.txt done fstrim -v mnt echo "init" cat /proc/buddyinfo echo "compaction" echo 1 > /proc/sys/vm/compact_memory cat /proc/buddyinfo old: init Node 0, zone DMA 208 120 51 41 11 0 0 0 0 0 0 Node 0, zone DMA32 16380 13777 9184 3805 789 54 3 0 0 0 0 compaction Node 0, zone DMA 132 82 40 39 16 2 1 0 0 0 0 Node 0, zone DMA32 5219 5526 4969 3455 1831 677 139 15 0 0 0 new: init Node 0, zone DMA 379 115 97 19 2 0 0 0 0 0 0 Node 0, zone DMA32 18891 16774 10862 3947 637 21 0 0 0 0 0 compaction Node 0, zone DMA 214 66 87 29 10 3 0 0 0 0 0 Node 0, zone DMA32 1612 3139 3154 2469 1745 990 384 94 7 0 0 As you can see, compaction made so many high-order pages. Yay! Link: http://lkml.kernel.org/r/1464736881-24886-13-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Reviewed-by: Sergey Senozhatsky Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c2a1d7d..9e2a83c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -738,7 +738,8 @@ compress_again: handle = zs_malloc(meta->mem_pool, clen, __GFP_KSWAPD_RECLAIM | __GFP_NOWARN | - __GFP_HIGHMEM); + __GFP_HIGHMEM | + __GFP_MOVABLE); if (!handle) { zcomp_stream_put(zram->comp); zstrm = NULL; @@ -746,7 +747,8 @@ compress_again: atomic64_inc(&zram->stats.writestall); handle = zs_malloc(meta->mem_pool, clen, - GFP_NOIO | __GFP_HIGHMEM); + GFP_NOIO | __GFP_HIGHMEM | + __GFP_MOVABLE); if (handle) goto compress_again; -- cgit v1.1 From 8ea1d2a1985a7ae096edf5850a31d844ad1b8e97 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 26 Jul 2016 15:24:42 -0700 Subject: mm, frontswap: convert frontswap_enabled to static key I have noticed that frontswap.h first declares "frontswap_enabled" as extern bool variable, and then overrides it with "#define frontswap_enabled (1)" for CONFIG_FRONTSWAP=Y or (0) when disabled. The bool variable isn't actually instantiated anywhere. This all looks like an unfinished attempt to make frontswap_enabled reflect whether a backend is instantiated. But in the current state, all frontswap hooks call unconditionally into frontswap.c just to check if frontswap_ops is non-NULL. This should at least be checked inline, but we can further eliminate the overhead when CONFIG_FRONTSWAP is enabled and no backend registered, using a static key that is initially disabled, and gets enabled only upon first backend registration. Thus, checks for "frontswap_enabled" are replaced with "frontswap_enabled()" wrapping the static key check. There are two exceptions: - xen's selfballoon_process() was testing frontswap_enabled in code guarded by #ifdef CONFIG_FRONTSWAP, which was effectively always true when reachable. The patch just removes this check. Using frontswap_enabled() does not sound correct here, as this can be true even without xen's own backend being registered. - in SYSCALL_DEFINE2(swapon), change the check to IS_ENABLED(CONFIG_FRONTSWAP) as it seems the bitmap allocation cannot currently be postponed until a backend is registered. This means that frontswap will still have some memory overhead by being configured, but without a backend. After the patch, we can expect that some functions in frontswap.c are called only when frontswap_ops is non-NULL. Change the checks there to VM_BUG_ONs. While at it, convert other BUG_ONs to VM_BUG_ONs as frontswap has been stable for some time. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1463152235-9717-1-git-send-email-vbabka@suse.cz Signed-off-by: Vlastimil Babka Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: Juergen Gross Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/xen/xen-selfballoon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 53a085f..6662071 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -195,7 +195,7 @@ static void selfballoon_process(struct work_struct *work) MB2PAGES(selfballoon_reserved_mb); #ifdef CONFIG_FRONTSWAP /* allow space for frontswap pages to be repatriated */ - if (frontswap_selfshrinking && frontswap_enabled) + if (frontswap_selfshrinking) goal_pages += frontswap_curr_pages(); #endif if (cur_pages > goal_pages) @@ -230,7 +230,7 @@ static void selfballoon_process(struct work_struct *work) reset_timer = true; } #ifdef CONFIG_FRONTSWAP - if (frontswap_selfshrinking && frontswap_enabled) { + if (frontswap_selfshrinking) { frontswap_selfshrink(); reset_timer = true; } -- cgit v1.1 From dcddffd41d3f1d3bdcc1dce3f1cd142779b6d4c1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 26 Jul 2016 15:25:18 -0700 Subject: mm: do not pass mm_struct into handle_mm_fault We always have vma->vm_mm around. Link: http://lkml.kernel.org/r/1466021202-61880-8-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iommu/amd_iommu_v2.c | 3 +-- drivers/iommu/intel-svm.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 56999d2f..fbdaf81 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -538,8 +538,7 @@ static void do_fault(struct work_struct *work) if (access_error(vma, fault)) goto out; - ret = handle_mm_fault(mm, vma, address, flags); - + ret = handle_mm_fault(vma, address, flags); out: up_read(&mm->mmap_sem); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index d9939fa..8ebb353 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -583,7 +583,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (access_error(vma, req)) goto invalid; - ret = handle_mm_fault(svm->mm, vma, address, + ret = handle_mm_fault(vma, address, req->wr_req ? FAULT_FLAG_WRITE : 0); if (ret & VM_FAULT_ERROR) goto invalid; -- cgit v1.1 From 65c453778aea374a46597f4d9826274d1eaf7338 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 26 Jul 2016 15:26:10 -0700 Subject: mm, rmap: account shmem thp pages Let's add ShmemHugePages and ShmemPmdMapped fields into meminfo and smaps. It indicates how many times we allocate and map shmem THP. NR_ANON_TRANSPARENT_HUGEPAGES is renamed to NR_ANON_THPS. Link: http://lkml.kernel.org/r/1466021202-61880-27-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index 560751b..51c7db2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -113,6 +113,8 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d SUnreclaim: %8lu kB\n" #ifdef CONFIG_TRANSPARENT_HUGEPAGE "Node %d AnonHugePages: %8lu kB\n" + "Node %d ShmemHugePages: %8lu kB\n" + "Node %d ShmemPmdMapped: %8lu kB\n" #endif , nid, K(node_page_state(nid, NR_FILE_DIRTY)), @@ -131,10 +133,13 @@ static ssize_t node_read_meminfo(struct device *dev, node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE - nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)) - , nid, - K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) * - HPAGE_PMD_NR)); + nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), + nid, K(node_page_state(nid, NR_ANON_THPS) * + HPAGE_PMD_NR), + nid, K(node_page_state(nid, NR_SHMEM_THPS) * + HPAGE_PMD_NR), + nid, K(node_page_state(nid, NR_SHMEM_PMDMAPPED) * + HPAGE_PMD_NR)); #else nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); #endif -- cgit v1.1 From c01d5b300774d130a24d787825b01eb24e6e20cb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 26 Jul 2016 15:26:15 -0700 Subject: shmem: get_unmapped_area align huge page Provide a shmem_get_unmapped_area method in file_operations, called at mmap time to decide the mapping address. It could be conditional on CONFIG_TRANSPARENT_HUGEPAGE, but save #ifdefs in other places by making it unconditional. shmem_get_unmapped_area() first calls the usual mm->get_unmapped_area (which we treat as a black box, highly dependent on architecture and config and executable layout). Lots of conditions, and in most cases it just goes with the address that chose; but when our huge stars are rightly aligned, yet that did not provide a suitable address, go back to ask for a larger arena, within which to align the mapping suitably. There have to be some direct calls to shmem_get_unmapped_area(), not via the file_operations: because of the way shmem_zero_setup() is called to create a shmem object late in the mmap sequence, when MAP_SHARED is requested with MAP_ANONYMOUS or /dev/zero. Though this only matters when /proc/sys/vm/shmem_huge has been set. Link: http://lkml.kernel.org/r/1466021202-61880-29-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Hugh Dickins Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/mem.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers') diff --git a/drivers/char/mem.c b/drivers/char/mem.c index d633974..a33163d 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -657,6 +658,28 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma) return 0; } +static unsigned long get_unmapped_area_zero(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ +#ifdef CONFIG_MMU + if (flags & MAP_SHARED) { + /* + * mmap_zero() will call shmem_zero_setup() to create a file, + * so use shmem's get_unmapped_area in case it can be huge; + * and pass NULL for file as in mmap.c's get_unmapped_area(), + * so as not to confuse shmem with our handle on "/dev/zero". + */ + return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); + } + + /* Otherwise flags & MAP_PRIVATE: with no shmem object beneath it */ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +#else + return -ENOSYS; +#endif +} + static ssize_t write_full(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -764,6 +787,7 @@ static const struct file_operations zero_fops = { .read_iter = read_iter_zero, .write_iter = write_iter_zero, .mmap = mmap_zero, + .get_unmapped_area = get_unmapped_area_zero, #ifndef CONFIG_MMU .mmap_capabilities = zero_mmap_capabilities, #endif -- cgit v1.1 From dd4123f324bbaec7618b677b7bce2b11aee9594e Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 26 Jul 2016 15:26:50 -0700 Subject: mm: fix build warnings in Randy reported below build error. > In file included from ../include/linux/balloon_compaction.h:48:0, > from ../mm/balloon_compaction.c:11: > ../include/linux/compaction.h:237:51: warning: 'struct node' declared inside parameter list [enabled by default] > static inline int compaction_register_node(struct node *node) > ../include/linux/compaction.h:237:51: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] > ../include/linux/compaction.h:242:54: warning: 'struct node' declared inside parameter list [enabled by default] > static inline void compaction_unregister_node(struct node *node) > It was caused by non-lru page migration which needs compaction.h but compaction.h doesn't include any header to be standalone. I think proper header for non-lru page migration is migrate.h rather than compaction.h because migrate.h has already headers needed to work non-lru page migration indirectly like isolate_mode_t, migrate_mode MIGRATEPAGE_SUCCESS. [akpm@linux-foundation.org: revert mm-balloon-use-general-non-lru-movable-page-feature-fix.patch temp fix] Link: http://lkml.kernel.org/r/20160610003304.GE29779@bbox Signed-off-by: Minchan Kim Reported-by: Randy Dunlap Cc: Konstantin Khlebnikov Cc: Vlastimil Babka Cc: Gioh Kim Cc: Rafael Aquini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 88d5609..888d5f8 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -493,7 +493,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, put_page(page); /* balloon reference */ - return 0; + return MIGRATEPAGE_SUCCESS; } static struct dentry *balloon_mount(struct file_system_type *fs_type, -- cgit v1.1