From bea41197ead3e03308bdd10c11db3ce91ae5c8ab Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:39 -0700 Subject: s390/mm: make hugepages_supported a boot time decision There is a potential bug with KVM and hugetlbfs if the hardware does not support hugepages (EDAT1). We fix this by making EDAT1 a hard requirement for hugepages and therefore removing and simplifying code. As s390, with the sw-emulated hugepages, was the only user of arch_prepare/release_hugepage I also removed theses calls from common and other architecture code. This patch (of 5): By dropping support for hugepages on machines which do not have the hardware feature EDAT1, we fix a potential s390 KVM bug. The bug would happen if a guest is backed by hugetlbfs (not supported currently), but does not get pagetables with PGSTE. This would lead to random memory overwrites. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/page.h | 8 ++++---- arch/s390/kernel/setup.c | 2 ++ arch/s390/mm/pgtable.c | 2 ++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd..0844b78 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -17,7 +17,10 @@ #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) -#define HPAGE_SHIFT 20 +#include +#ifndef __ASSEMBLY__ + +extern unsigned int HPAGE_SHIFT; #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) @@ -27,9 +30,6 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH -#include -#ifndef __ASSEMBLY__ - static inline void storage_key_init_range(unsigned long start, unsigned long end) { #if PAGE_DEFAULT_KEY diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index af4f41d..73941bf 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -880,6 +880,8 @@ void __init setup_arch(char **cmdline_p) */ setup_hwcaps(); + HPAGE_SHIFT = MACHINE_HAS_HPAGE ? 20 : 0; + /* * Create kernel page tables and switch to virtual addressing. */ diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b33f661..16154720 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -31,6 +31,8 @@ #define ALLOC_ORDER 2 #define FRAG_MASK 0x03 +unsigned int HPAGE_SHIFT; + unsigned long *crst_table_alloc(struct mm_struct *mm) { struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); -- cgit v1.1 From 8408427e6b9f37bc5ce09933947e670f87568b77 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:42 -0700 Subject: mm/hugetlb: remove unused arch hook prepare/release_hugepage With s390 dropping support for emulated hugepages, the last user of arch_prepare_hugepage and arch_release_hugepage is gone. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 75c0eef..a8c3087 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -975,7 +975,6 @@ static void update_and_free_page(struct hstate *h, struct page *page) destroy_compound_gigantic_page(page, huge_page_order(h)); free_gigantic_page(page, huge_page_order(h)); } else { - arch_release_hugepage(page); __free_pages(page, huge_page_order(h)); } } @@ -1160,10 +1159,6 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); if (page) { - if (arch_prepare_hugepage(page)) { - __free_pages(page, huge_page_order(h)); - return NULL; - } prep_new_huge_page(h, page, nid); } @@ -1315,11 +1310,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); - if (page && arch_prepare_hugepage(page)) { - __free_pages(page, huge_page_order(h)); - page = NULL; - } - spin_lock(&hugetlb_lock); if (page) { INIT_LIST_HEAD(&page->lru); -- cgit v1.1 From 08bd4fc15683b9a26b9be7048d151f4ddadad96f Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:44 -0700 Subject: mm/hugetlb: remove arch_prepare/release_hugepage from arch headers Nobody used these hooks so they were removed from common code, and can now be removed from the architectures. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Acked-by: Ralf Baechle Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/hugetlb.h | 9 --------- arch/arm64/include/asm/hugetlb.h | 9 --------- arch/ia64/include/asm/hugetlb.h | 9 --------- arch/metag/include/asm/hugetlb.h | 9 --------- arch/mips/include/asm/hugetlb.h | 9 --------- arch/powerpc/include/asm/hugetlb.h | 9 --------- arch/sh/include/asm/hugetlb.h | 9 --------- arch/sparc/include/asm/hugetlb.h | 9 --------- arch/tile/include/asm/hugetlb.h | 9 --------- arch/x86/include/asm/hugetlb.h | 9 --------- 10 files changed, 90 deletions(-) diff --git a/arch/arm/include/asm/hugetlb.h b/arch/arm/include/asm/hugetlb.h index 31bb7dc..7d26f6c 100644 --- a/arch/arm/include/asm/hugetlb.h +++ b/arch/arm/include/asm/hugetlb.h @@ -63,15 +63,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 734c17e..2fd9b14 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -96,15 +96,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h index ff1377b..ef65f02 100644 --- a/arch/ia64/include/asm/hugetlb.h +++ b/arch/ia64/include/asm/hugetlb.h @@ -65,15 +65,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h index f730b39..905ed42 100644 --- a/arch/metag/include/asm/hugetlb.h +++ b/arch/metag/include/asm/hugetlb.h @@ -67,15 +67,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index 4a5bb54..982bc06 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -110,15 +110,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 4bbd3c8..7eac89b 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -168,15 +168,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h index b788a9b..ef489a5 100644 --- a/arch/sh/include/asm/hugetlb.h +++ b/arch/sh/include/asm/hugetlb.h @@ -79,15 +79,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index 3130d76..139e711 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -78,15 +78,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 1abd00c..2fac5be 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h @@ -94,15 +94,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index dab7a3a..f8a29d2 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -80,15 +80,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep) return *ptep; } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { } -- cgit v1.1 From ce415712cf920e08d622f07eaa9f5d1eb7e93919 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:47 -0700 Subject: s390/hugetlb: remove dead code for sw emulated huge pages We now support only hugepages on hardware with EDAT1 support. So we remove the prepare/release_hugepage hooks and simplify set_huge_pte_at and huge_ptep_get. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/hugetlb.h | 3 --- arch/s390/mm/hugetlbpage.c | 60 +++-------------------------------------- 2 files changed, 3 insertions(+), 60 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index dfb542a..0130d03 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,9 +37,6 @@ static inline int prepare_hugepage_range(struct file *file, #define arch_clear_hugepage_flags(page) do { } while (0) -int arch_prepare_hugepage(struct page *page); -void arch_release_hugepage(struct page *page); - static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c3f8e3d..3bbfd4f 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -86,31 +86,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pmd_t pmd; + pmd_t pmd = __pte_to_pmd(pte); - pmd = __pte_to_pmd(pte); - if (!MACHINE_HAS_HPAGE) { - /* Emulated huge ptes loose the dirty and young bit */ - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= pte_page(pte)[1].index; - } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } pte_t huge_ptep_get(pte_t *ptep) { - unsigned long origin; - pmd_t pmd; + pmd_t pmd = *(pmd_t *) ptep; - pmd = *(pmd_t *) ptep; - if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { - origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= *(unsigned long *) origin; - /* Emulated huge ptes are young and dirty by definition */ - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; - } return __pmd_to_pte(pmd); } @@ -125,45 +110,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return pte; } -int arch_prepare_hugepage(struct page *page) -{ - unsigned long addr = page_to_phys(page); - pte_t pte; - pte_t *ptep; - int i; - - if (MACHINE_HAS_HPAGE) - return 0; - - ptep = (pte_t *) pte_alloc_one(&init_mm, addr); - if (!ptep) - return -ENOMEM; - - pte_val(pte) = addr; - for (i = 0; i < PTRS_PER_PTE; i++) { - set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); - pte_val(pte) += PAGE_SIZE; - } - page[1].index = (unsigned long) ptep; - return 0; -} - -void arch_release_hugepage(struct page *page) -{ - pte_t *ptep; - - if (MACHINE_HAS_HPAGE) - return; - - ptep = (pte_t *) page[1].index; - if (!ptep) - return; - clear_table((unsigned long *) ptep, _PAGE_INVALID, - PTRS_PER_PTE * sizeof(pte_t)); - page_table_free(&init_mm, (unsigned long *) ptep); - page[1].index = 0; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { -- cgit v1.1 From cbd7d9c2b70f5e2fc78e0c90b3034b94dca6c82b Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:49 -0700 Subject: s390/mm: forward check for huge pmds to pmd_large() We already do the check in pmd_large, so we can just forward the call. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/hugetlbpage.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 3bbfd4f..fb4bf2c 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -141,10 +141,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) int pmd_huge(pmd_t pmd) { - if (!MACHINE_HAS_HPAGE) - return 0; - - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return pmd_large(pmd); } int pud_huge(pud_t pud) -- cgit v1.1 From cf54e2fce51c7ad2479fe8cf213a2ed618a8189b Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 25 Jun 2015 14:59:52 -0700 Subject: s390/mm: change HPAGE_SHIFT type to int With making HPAGE_SHIFT an unsigned integer we also accidentally changed pageblock_order. In order to avoid compiler warnings we make HPAGE_SHFIT an int again. Signed-off-by: Dominik Dingel Suggested-by: Andrew Morton Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/include/asm/page.h | 2 +- arch/s390/mm/pgtable.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 0844b78..dd34523 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -20,7 +20,7 @@ #include #ifndef __ASSEMBLY__ -extern unsigned int HPAGE_SHIFT; +extern int HPAGE_SHIFT; #define HPAGE_SIZE (1UL << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 16154720..33082d0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -31,7 +31,7 @@ #define ALLOC_ORDER 2 #define FRAG_MASK 0x03 -unsigned int HPAGE_SHIFT; +int HPAGE_SHIFT; unsigned long *crst_table_alloc(struct mm_struct *mm) { -- cgit v1.1 From 9e65bf68a8f14f1b3fb16dc85dbbaff79da4bfeb Mon Sep 17 00:00:00 2001 From: Marcin Jabrzyk Date: Thu, 25 Jun 2015 14:59:55 -0700 Subject: zram: remove obsolete ZRAM_DEBUG option This config option doesn't provide any usage for zram. Signed-off-by: Marcin Jabrzyk Acked-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/Kconfig | 10 +--------- drivers/block/zram/zram_drv.c | 4 ---- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 6489c0f..386ba3d 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -23,12 +23,4 @@ config ZRAM_LZ4_COMPRESS default n help This option enables LZ4 compression algorithm support. Compression - algorithm can be changed using `comp_algorithm' device attribute. - -config ZRAM_DEBUG - bool "Compressed RAM block device debug support" - depends on ZRAM - default n - help - This option adds additional debugging code to the compressed - RAM block device driver. + algorithm can be changed using `comp_algorithm' device attribute. \ No newline at end of file diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6e134f4..00e7966 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -15,10 +15,6 @@ #define KMSG_COMPONENT "zram" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt -#ifdef CONFIG_ZRAM_DEBUG -#define DEBUG -#endif - #include #include #include -- cgit v1.1 From 13a18a1c04775e48788a25ba7ed17c885df6b1d1 Mon Sep 17 00:00:00 2001 From: Marcin Jabrzyk Date: Thu, 25 Jun 2015 14:59:58 -0700 Subject: zsmalloc: remove obsolete ZSMALLOC_DEBUG The DEBUG define in zsmalloc is useless, there is no usage of it at all. Signed-off-by: Marcin Jabrzyk Acked-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index a8b5e74..c766240 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -45,10 +45,6 @@ * */ -#ifdef CONFIG_ZSMALLOC_DEBUG -#define DEBUG -#endif - #include #include #include -- cgit v1.1 From 3d8ed88ba7f612305785fc1f3cefa043f817bb3e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:00 -0700 Subject: zram: add `compact` sysfs entry to documentation We currently don't support zram on-demand device creation. The only way to have N zram devices is to specify num_devices module parameter (default value 1). That means that if, for some reason, at some point, user wants to have N + 1 devies he/she must umount all the existing devices, unload the module, load the module passing num_devices equals to N + 1. This patchset introduces zram-control sysfs class, which has two sysfs attrs: - hot_add -- add a new zram device - hot_remove -- remove a specific (device_id) zram device Usage example: # add a new specific zram device cat /sys/class/zram-control/hot_add 1 # remove a specific zram device echo 4 > /sys/class/zram-control/hot_remove This patch (of 10): Briefly describe missing `compact` sysfs entry. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/blockdev/zram.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 48a183e..bef4998 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -126,7 +126,7 @@ mem_used_max RW the maximum amount memory zram have consumed to mem_limit RW the maximum amount of memory ZRAM can use to store the compressed data num_migrated RO the number of objects migrated migrated by compaction - +compact WO trigger memory compaction WARNING ======= -- cgit v1.1 From 3bca3ef7694166b86c91b515818cc5acecd357a7 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:03 -0700 Subject: zram: cosmetic ZRAM_ATTR_RO code formatting tweak Fix a misplaced backslash. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 00e7966..7814e68 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -49,7 +49,7 @@ static inline void deprecated_attr_warn(const char *name) } #define ZRAM_ATTR_RO(name) \ -static ssize_t name##_show(struct device *d, \ +static ssize_t name##_show(struct device *d, \ struct device_attribute *attr, char *b) \ { \ struct zram *zram = dev_to_zram(d); \ -- cgit v1.1 From 85508ec6cbc21645927b6ac05e3b2748119a3e23 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:06 -0700 Subject: zram: use idr instead of `zram_devices' array This patch makes some preparations for on-demand device add/remove functionality. Remove `zram_devices' array and switch to id-to-pointer translation (idr). idr doesn't bloat zram struct with additional members, f.e. list_head, yet still provides ability to match the device_id with the device pointer. No user-space visible changes. [Julia.Lawall@lip6.fr: return -ENOMEM when `queue' alloc fails] Signed-off-by: Sergey Senozhatsky Reported-by: Julia Lawall Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 86 ++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 7814e68..addb18d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -28,12 +28,12 @@ #include #include #include +#include #include "zram_drv.h" -/* Globals */ +static DEFINE_IDR(zram_index_idr); static int zram_major; -static struct zram *zram_devices; static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ @@ -1154,10 +1154,20 @@ static struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; -static int create_device(struct zram *zram, int device_id) +static int zram_add(int device_id) { + struct zram *zram; struct request_queue *queue; - int ret = -ENOMEM; + int ret; + + zram = kzalloc(sizeof(struct zram), GFP_KERNEL); + if (!zram) + return -ENOMEM; + + ret = idr_alloc(&zram_index_idr, zram, device_id, + device_id + 1, GFP_KERNEL); + if (ret < 0) + goto out_free_dev; init_rwsem(&zram->init_lock); @@ -1165,12 +1175,13 @@ static int create_device(struct zram *zram, int device_id) if (!queue) { pr_err("Error allocating disk queue for device %d\n", device_id); - goto out; + ret = -ENOMEM; + goto out_free_idr; } blk_queue_make_request(queue, zram_make_request); - /* gendisk structure */ + /* gendisk structure */ zram->disk = alloc_disk(1); if (!zram->disk) { pr_warn("Error allocating disk structure for device %d\n", @@ -1235,34 +1246,42 @@ out_free_disk: put_disk(zram->disk); out_free_queue: blk_cleanup_queue(queue); -out: +out_free_idr: + idr_remove(&zram_index_idr, device_id); +out_free_dev: + kfree(zram); return ret; } -static void destroy_devices(unsigned int nr) +static void zram_remove(struct zram *zram) { - struct zram *zram; - unsigned int i; - - for (i = 0; i < nr; i++) { - zram = &zram_devices[i]; - /* - * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices - */ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); + /* + * Remove sysfs first, so no one will perform a disksize + * store while we destroy the devices + */ + sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, + &zram_disk_attr_group); - zram_reset_device(zram); + zram_reset_device(zram); + idr_remove(&zram_index_idr, zram->disk->first_minor); + blk_cleanup_queue(zram->disk->queue); + del_gendisk(zram->disk); + put_disk(zram->disk); + kfree(zram); +} - blk_cleanup_queue(zram->disk->queue); - del_gendisk(zram->disk); - put_disk(zram->disk); - } +static int zram_remove_cb(int id, void *ptr, void *data) +{ + zram_remove(ptr); + return 0; +} - kfree(zram_devices); +static void destroy_devices(void) +{ + idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); + idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); - pr_info("Destroyed %u device(s)\n", nr); + pr_info("Destroyed device(s)\n"); } static int __init zram_init(void) @@ -1281,16 +1300,9 @@ static int __init zram_init(void) return -EBUSY; } - /* Allocate the device array and initialize each one */ - zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); - if (!zram_devices) { - unregister_blkdev(zram_major, "zram"); - return -ENOMEM; - } - for (dev_id = 0; dev_id < num_devices; dev_id++) { - ret = create_device(&zram_devices[dev_id], dev_id); - if (ret) + ret = zram_add(dev_id); + if (ret != 0) goto out_error; } @@ -1298,13 +1310,13 @@ static int __init zram_init(void) return 0; out_error: - destroy_devices(dev_id); + destroy_devices(); return ret; } static void __exit zram_exit(void) { - destroy_devices(num_devices); + destroy_devices(); } module_init(zram_init); -- cgit v1.1 From 522698d7cadb5d208429c934f673713b7a42e925 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:08 -0700 Subject: zram: reorganize code layout This patch looks big, but basically it just moves code blocks. No functional changes. Our current code layout looks like a sandwitch. For example, a) between read/write handlers, we have update_used_max() helper function: static int zram_decompress_page static int zram_bvec_read static inline void update_used_max static int zram_bvec_write static int zram_bvec_rw b) RW request handlers __zram_make_request/zram_bio_discard are divided by sysfs attr reset_store() function and corresponding zram_reset_device() handler: static void zram_bio_discard static void zram_reset_device static ssize_t disksize_store static ssize_t reset_store static void __zram_make_request c) we first a bunch of sysfs read/store functions. then a number of one-liners, then helper functions, RW functions, sysfs functions, helper functions again, and so on. Reorganize layout to be more logically grouped (a brief description, `cat zram_drv.c | grep static` gives a bigger picture): -- one-liners: zram_test_flag/etc. -- helpers: is_partial_io/update_position/etc -- sysfs attr show/store functions + ZRAM_ATTR_RO() generated stats show() functions exception: reset and disksize store functions are required to be after meta() functions. because we do device create/destroy actions in these sysfs handlers. -- "mm" functions: meta get/put, meta alloc/free, page free static inline bool zram_meta_get static inline void zram_meta_put static void zram_meta_free static struct zram_meta *zram_meta_alloc static void zram_free_page -- a block of I/O functions static int zram_decompress_page static int zram_bvec_read static int zram_bvec_write static void zram_bio_discard static int zram_bvec_rw static void __zram_make_request static void zram_make_request static void zram_slot_free_notify static int zram_rw_page -- device contol: add/remove/init/reset functions (+zram-control class will sit here) static int zram_reset_device static ssize_t reset_store static ssize_t disksize_store static int zram_add static void zram_remove static int __init zram_init static void __exit zram_exit Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 737 +++++++++++++++++++++--------------------- 1 file changed, 368 insertions(+), 369 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index addb18d..82bc2ff 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -70,33 +70,117 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -static ssize_t compact_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +/* flag operations needs meta->tb_lock */ +static int zram_test_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) { - unsigned long nr_migrated; - struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta; + return meta->table[index].value & BIT(flag); +} - down_read(&zram->init_lock); - if (!init_done(zram)) { - up_read(&zram->init_lock); - return -EINVAL; - } +static void zram_set_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) +{ + meta->table[index].value |= BIT(flag); +} - meta = zram->meta; - nr_migrated = zs_compact(meta->mem_pool); - atomic64_add(nr_migrated, &zram->stats.num_migrated); - up_read(&zram->init_lock); +static void zram_clear_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) +{ + meta->table[index].value &= ~BIT(flag); +} - return len; +static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) +{ + return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); } -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) +static void zram_set_obj_size(struct zram_meta *meta, + u32 index, size_t size) { - struct zram *zram = dev_to_zram(dev); + unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; - return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); + meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; +} + +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, + sector_t start, unsigned int size) +{ + u64 end, bound; + + /* unaligned request */ + if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + end = start + (size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end > bound || start > end)) + return 0; + + /* I/O request is valid */ + return 1; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; +} + +static inline void update_used_max(struct zram *zram, + const unsigned long pages) +{ + unsigned long old_max, cur_max; + + old_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + cur_max = old_max; + if (pages > cur_max) + old_max = atomic_long_cmpxchg( + &zram->stats.max_used_pages, cur_max, pages); + } while (old_max != cur_max); +} + +static int page_zero_filled(void *ptr) +{ + unsigned int pos; + unsigned long *page; + + page = (unsigned long *)ptr; + + for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { + if (page[pos]) + return 0; + } + + return 1; +} + +static void handle_zero_page(struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + void *user_mem; + + user_mem = kmap_atomic(page); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); + kunmap_atomic(user_mem); + + flush_dcache_page(page); } static ssize_t initstate_show(struct device *dev, @@ -112,6 +196,14 @@ static ssize_t initstate_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%u\n", val); } +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); +} + static ssize_t orig_data_size_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -139,19 +231,6 @@ static ssize_t mem_used_total_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); } -static ssize_t max_comp_streams_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - int val; - struct zram *zram = dev_to_zram(dev); - - down_read(&zram->init_lock); - val = zram->max_comp_streams; - up_read(&zram->init_lock); - - return scnprintf(buf, PAGE_SIZE, "%d\n", val); -} - static ssize_t mem_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -221,6 +300,19 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } +static ssize_t max_comp_streams_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->max_comp_streams; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", val); +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -278,71 +370,101 @@ static ssize_t comp_algorithm_store(struct device *dev, return len; } -/* flag operations needs meta->tb_lock */ -static int zram_test_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) +static ssize_t compact_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - return meta->table[index].value & BIT(flag); -} + unsigned long nr_migrated; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta; -static void zram_set_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) -{ - meta->table[index].value |= BIT(flag); -} + down_read(&zram->init_lock); + if (!init_done(zram)) { + up_read(&zram->init_lock); + return -EINVAL; + } -static void zram_clear_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) -{ - meta->table[index].value &= ~BIT(flag); -} + meta = zram->meta; + nr_migrated = zs_compact(meta->mem_pool); + atomic64_add(nr_migrated, &zram->stats.num_migrated); + up_read(&zram->init_lock); -static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) -{ - return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); + return len; } -static void zram_set_obj_size(struct zram_meta *meta, - u32 index, size_t size) +static ssize_t io_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { - unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; + struct zram *zram = dev_to_zram(dev); + ssize_t ret; - meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; -} + down_read(&zram->init_lock); + ret = scnprintf(buf, PAGE_SIZE, + "%8llu %8llu %8llu %8llu\n", + (u64)atomic64_read(&zram->stats.failed_reads), + (u64)atomic64_read(&zram->stats.failed_writes), + (u64)atomic64_read(&zram->stats.invalid_io), + (u64)atomic64_read(&zram->stats.notify_free)); + up_read(&zram->init_lock); -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; + return ret; } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline int valid_io_request(struct zram *zram, - sector_t start, unsigned int size) +static ssize_t mm_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { - u64 end, bound; + struct zram *zram = dev_to_zram(dev); + u64 orig_size, mem_used = 0; + long max_used; + ssize_t ret; - /* unaligned request */ - if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return 0; - if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return 0; + down_read(&zram->init_lock); + if (init_done(zram)) + mem_used = zs_get_total_pages(zram->meta->mem_pool); - end = start + (size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range range */ - if (unlikely(start >= bound || end > bound || start > end)) - return 0; + orig_size = atomic64_read(&zram->stats.pages_stored); + max_used = atomic_long_read(&zram->stats.max_used_pages); - /* I/O request is valid */ - return 1; -} + ret = scnprintf(buf, PAGE_SIZE, + "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n", + orig_size << PAGE_SHIFT, + (u64)atomic64_read(&zram->stats.compr_data_size), + mem_used << PAGE_SHIFT, + zram->limit_pages << PAGE_SHIFT, + max_used << PAGE_SHIFT, + (u64)atomic64_read(&zram->stats.zero_pages), + (u64)atomic64_read(&zram->stats.num_migrated)); + up_read(&zram->init_lock); -static void zram_meta_free(struct zram_meta *meta, u64 disksize) -{ - size_t num_pages = disksize >> PAGE_SHIFT; - size_t index; + return ret; +} + +static DEVICE_ATTR_RO(io_stat); +static DEVICE_ATTR_RO(mm_stat); +ZRAM_ATTR_RO(num_reads); +ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(failed_reads); +ZRAM_ATTR_RO(failed_writes); +ZRAM_ATTR_RO(invalid_io); +ZRAM_ATTR_RO(notify_free); +ZRAM_ATTR_RO(zero_pages); +ZRAM_ATTR_RO(compr_data_size); + +static inline bool zram_meta_get(struct zram *zram) +{ + if (atomic_inc_not_zero(&zram->refcount)) + return true; + return false; +} + +static inline void zram_meta_put(struct zram *zram) +{ + atomic_dec(&zram->refcount); +} + +static void zram_meta_free(struct zram_meta *meta, u64 disksize) +{ + size_t num_pages = disksize >> PAGE_SHIFT; + size_t index; /* Free all pages that are still in this zram device */ for (index = 0; index < num_pages; index++) { @@ -390,56 +512,6 @@ out_error: return NULL; } -static inline bool zram_meta_get(struct zram *zram) -{ - if (atomic_inc_not_zero(&zram->refcount)) - return true; - return false; -} - -static inline void zram_meta_put(struct zram *zram) -{ - atomic_dec(&zram->refcount); -} - -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) -{ - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; -} - -static int page_zero_filled(void *ptr) -{ - unsigned int pos; - unsigned long *page; - - page = (unsigned long *)ptr; - - for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { - if (page[pos]) - return 0; - } - - return 1; -} - -static void handle_zero_page(struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - void *user_mem; - - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); - else - clear_page(user_mem); - kunmap_atomic(user_mem); - - flush_dcache_page(page); -} - - /* * To protect concurrent access to the same index entry, * caller should hold this table index entry's bit_spinlock to @@ -557,21 +629,6 @@ out_cleanup: return ret; } -static inline void update_used_max(struct zram *zram, - const unsigned long pages) -{ - unsigned long old_max, cur_max; - - old_max = atomic_long_read(&zram->stats.max_used_pages); - - do { - cur_max = old_max; - if (pages > cur_max) - old_max = atomic_long_cmpxchg( - &zram->stats.max_used_pages, cur_max, pages); - } while (old_max != cur_max); -} - static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { @@ -699,35 +756,6 @@ out: return ret; } -static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, int rw) -{ - unsigned long start_time = jiffies; - int ret; - - generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, - &zram->disk->part0); - - if (rw == READ) { - atomic64_inc(&zram->stats.num_reads); - ret = zram_bvec_read(zram, bvec, index, offset); - } else { - atomic64_inc(&zram->stats.num_writes); - ret = zram_bvec_write(zram, bvec, index, offset); - } - - generic_end_io_acct(rw, &zram->disk->part0, start_time); - - if (unlikely(ret)) { - if (rw == READ) - atomic64_inc(&zram->stats.failed_reads); - else - atomic64_inc(&zram->stats.failed_writes); - } - - return ret; -} - /* * zram_bio_discard - handler on discard request * @index: physical block index in PAGE_SIZE units @@ -767,151 +795,32 @@ static void zram_bio_discard(struct zram *zram, u32 index, } } -static void zram_reset_device(struct zram *zram) -{ - struct zram_meta *meta; - struct zcomp *comp; - u64 disksize; - - down_write(&zram->init_lock); - - zram->limit_pages = 0; - - if (!init_done(zram)) { - up_write(&zram->init_lock); - return; - } - - meta = zram->meta; - comp = zram->comp; - disksize = zram->disksize; - /* - * Refcount will go down to 0 eventually and r/w handler - * cannot handle further I/O so it will bail out by - * check zram_meta_get. - */ - zram_meta_put(zram); - /* - * We want to free zram_meta in process context to avoid - * deadlock between reclaim path and any other locks. - */ - wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); - - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - zram->disksize = 0; - zram->max_comp_streams = 1; - - set_capacity(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); - - up_write(&zram->init_lock); - /* I/O operation under all of CPU are done so let's free */ - zram_meta_free(meta, disksize); - zcomp_destroy(comp); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - u64 disksize; - struct zcomp *comp; - struct zram_meta *meta; - struct zram *zram = dev_to_zram(dev); - int err; - - disksize = memparse(buf, NULL); - if (!disksize) - return -EINVAL; - - disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(zram->disk->first_minor, disksize); - if (!meta) - return -ENOMEM; - - comp = zcomp_create(zram->compressor, zram->max_comp_streams); - if (IS_ERR(comp)) { - pr_info("Cannot initialise %s compressing backend\n", - zram->compressor); - err = PTR_ERR(comp); - goto out_free_meta; - } - - down_write(&zram->init_lock); - if (init_done(zram)) { - pr_info("Cannot change disksize for initialized device\n"); - err = -EBUSY; - goto out_destroy_comp; - } - - init_waitqueue_head(&zram->io_done); - atomic_set(&zram->refcount, 1); - zram->meta = meta; - zram->comp = comp; - zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - up_write(&zram->init_lock); - - /* - * Revalidate disk out of the init_lock to avoid lockdep splat. - * It's okay because disk's capacity is protected by init_lock - * so that revalidate_disk always sees up-to-date capacity. - */ - revalidate_disk(zram->disk); - - return len; - -out_destroy_comp: - up_write(&zram->init_lock); - zcomp_destroy(comp); -out_free_meta: - zram_meta_free(meta, disksize); - return err; -} - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) +static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, + int offset, int rw) { + unsigned long start_time = jiffies; int ret; - unsigned short do_reset; - struct zram *zram; - struct block_device *bdev; - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - if (!bdev) - return -ENOMEM; + generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, + &zram->disk->part0); - mutex_lock(&bdev->bd_mutex); - /* Do not reset an active device! */ - if (bdev->bd_openers) { - ret = -EBUSY; - goto out; + if (rw == READ) { + atomic64_inc(&zram->stats.num_reads); + ret = zram_bvec_read(zram, bvec, index, offset); + } else { + atomic64_inc(&zram->stats.num_writes); + ret = zram_bvec_write(zram, bvec, index, offset); } - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - goto out; + generic_end_io_acct(rw, &zram->disk->part0, start_time); - if (!do_reset) { - ret = -EINVAL; - goto out; + if (unlikely(ret)) { + if (rw == READ) + atomic64_inc(&zram->stats.failed_reads); + else + atomic64_inc(&zram->stats.failed_writes); } - /* Make sure all pending I/O is finished */ - fsync_bdev(bdev); - zram_reset_device(zram); - - mutex_unlock(&bdev->bd_mutex); - revalidate_disk(zram->disk); - bdput(bdev); - - return len; - -out: - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return ret; } @@ -1051,80 +960,170 @@ out: return err; } -static const struct block_device_operations zram_devops = { - .swap_slot_free_notify = zram_slot_free_notify, - .rw_page = zram_rw_page, - .owner = THIS_MODULE -}; +static void zram_reset_device(struct zram *zram) +{ + struct zram_meta *meta; + struct zcomp *comp; + u64 disksize; -static DEVICE_ATTR_WO(compact); -static DEVICE_ATTR_RW(disksize); -static DEVICE_ATTR_RO(initstate); -static DEVICE_ATTR_WO(reset); -static DEVICE_ATTR_RO(orig_data_size); -static DEVICE_ATTR_RO(mem_used_total); -static DEVICE_ATTR_RW(mem_limit); -static DEVICE_ATTR_RW(mem_used_max); -static DEVICE_ATTR_RW(max_comp_streams); -static DEVICE_ATTR_RW(comp_algorithm); + down_write(&zram->init_lock); -static ssize_t io_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) + zram->limit_pages = 0; + + if (!init_done(zram)) { + up_write(&zram->init_lock); + return; + } + + meta = zram->meta; + comp = zram->comp; + disksize = zram->disksize; + /* + * Refcount will go down to 0 eventually and r/w handler + * cannot handle further I/O so it will bail out by + * check zram_meta_get. + */ + zram_meta_put(zram); + /* + * We want to free zram_meta in process context to avoid + * deadlock between reclaim path and any other locks. + */ + wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); + + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + zram->disksize = 0; + zram->max_comp_streams = 1; + + set_capacity(zram->disk, 0); + part_stat_set_all(&zram->disk->part0, 0); + + up_write(&zram->init_lock); + /* I/O operation under all of CPU are done so let's free */ + zram_meta_free(meta, disksize); + zcomp_destroy(comp); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { + u64 disksize; + struct zcomp *comp; + struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); - ssize_t ret; + int err; - down_read(&zram->init_lock); - ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8llu\n", - (u64)atomic64_read(&zram->stats.failed_reads), - (u64)atomic64_read(&zram->stats.failed_writes), - (u64)atomic64_read(&zram->stats.invalid_io), - (u64)atomic64_read(&zram->stats.notify_free)); - up_read(&zram->init_lock); + disksize = memparse(buf, NULL); + if (!disksize) + return -EINVAL; - return ret; + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(zram->disk->first_minor, disksize); + if (!meta) + return -ENOMEM; + + comp = zcomp_create(zram->compressor, zram->max_comp_streams); + if (IS_ERR(comp)) { + pr_info("Cannot initialise %s compressing backend\n", + zram->compressor); + err = PTR_ERR(comp); + goto out_free_meta; + } + + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Cannot change disksize for initialized device\n"); + err = -EBUSY; + goto out_destroy_comp; + } + + init_waitqueue_head(&zram->io_done); + atomic_set(&zram->refcount, 1); + zram->meta = meta; + zram->comp = comp; + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + revalidate_disk(zram->disk); + + return len; + +out_destroy_comp: + up_write(&zram->init_lock); + zcomp_destroy(comp); +out_free_meta: + zram_meta_free(meta, disksize); + return err; } -static ssize_t mm_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) { - struct zram *zram = dev_to_zram(dev); - u64 orig_size, mem_used = 0; - long max_used; - ssize_t ret; + int ret; + unsigned short do_reset; + struct zram *zram; + struct block_device *bdev; - down_read(&zram->init_lock); - if (init_done(zram)) - mem_used = zs_get_total_pages(zram->meta->mem_pool); + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); - orig_size = atomic64_read(&zram->stats.pages_stored); - max_used = atomic_long_read(&zram->stats.max_used_pages); + if (!bdev) + return -ENOMEM; - ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n", - orig_size << PAGE_SHIFT, - (u64)atomic64_read(&zram->stats.compr_data_size), - mem_used << PAGE_SHIFT, - zram->limit_pages << PAGE_SHIFT, - max_used << PAGE_SHIFT, - (u64)atomic64_read(&zram->stats.zero_pages), - (u64)atomic64_read(&zram->stats.num_migrated)); - up_read(&zram->init_lock); + mutex_lock(&bdev->bd_mutex); + /* Do not reset an active device! */ + if (bdev->bd_openers) { + ret = -EBUSY; + goto out; + } + + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + goto out; + + if (!do_reset) { + ret = -EINVAL; + goto out; + } + + /* Make sure all pending I/O is finished */ + fsync_bdev(bdev); + zram_reset_device(zram); + + mutex_unlock(&bdev->bd_mutex); + revalidate_disk(zram->disk); + bdput(bdev); + + return len; +out: + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); return ret; } -static DEVICE_ATTR_RO(io_stat); -static DEVICE_ATTR_RO(mm_stat); -ZRAM_ATTR_RO(num_reads); -ZRAM_ATTR_RO(num_writes); -ZRAM_ATTR_RO(failed_reads); -ZRAM_ATTR_RO(failed_writes); -ZRAM_ATTR_RO(invalid_io); -ZRAM_ATTR_RO(notify_free); -ZRAM_ATTR_RO(zero_pages); -ZRAM_ATTR_RO(compr_data_size); +static const struct block_device_operations zram_devops = { + .swap_slot_free_notify = zram_slot_free_notify, + .rw_page = zram_rw_page, + .owner = THIS_MODULE +}; + +static DEVICE_ATTR_WO(compact); +static DEVICE_ATTR_RW(disksize); +static DEVICE_ATTR_RO(initstate); +static DEVICE_ATTR_WO(reset); +static DEVICE_ATTR_RO(orig_data_size); +static DEVICE_ATTR_RO(mem_used_total); +static DEVICE_ATTR_RW(mem_limit); +static DEVICE_ATTR_RW(mem_used_max); +static DEVICE_ATTR_RW(max_comp_streams); +static DEVICE_ATTR_RW(comp_algorithm); static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, -- cgit v1.1 From c3cdb40e66344553898daa3ccd068c03173a3f42 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:11 -0700 Subject: zram: remove max_num_devices limitation Limiting the number of zram devices to 32 (default max_num_devices value) is confusing, let's drop it. A user with 2TB or 4TB of RAM, for example, can request as many devices as he can handle. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/blockdev/zram.txt | 4 +++- drivers/block/zram/zram_drv.c | 8 +------- drivers/block/zram/zram_drv.h | 6 ------ 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index bef4998..65e9430 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -19,7 +19,9 @@ Following shows a typical sequence of steps for using zram. 1) Load Module: modprobe zram num_devices=4 This creates 4 devices: /dev/zram{0,1,2,3} - (num_devices parameter is optional. Default: 1) + +num_devices parameter is optional and tells zram how many devices should be +pre-created. Default: 1. 2) Set max number of compression streams Compression backend may use up to max_comp_streams compression streams, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 82bc2ff..70aac2e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1287,12 +1287,6 @@ static int __init zram_init(void) { int ret, dev_id; - if (num_devices > max_num_devices) { - pr_warn("Invalid value for num_devices: %u\n", - num_devices); - return -EINVAL; - } - zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { pr_warn("Unable to get major number\n"); @@ -1322,7 +1316,7 @@ module_init(zram_init); module_exit(zram_exit); module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); +MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 570c598..042994e 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -20,12 +20,6 @@ #include "zcomp.h" -/* - * Some arbitrary value. This is just to catch - * invalid value for num_devices module parameter. - */ -static const unsigned max_num_devices = 32; - /*-- Configurable parameters */ /* -- cgit v1.1 From d12b63c927e0e90de4b55d5084f947707c6d8cf4 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:14 -0700 Subject: zram: report every added and removed device With dynamic device creation/removal (which will be introduced later in the series) printing num_devices in zram_init() will not make a lot of sense, as well as printing the number of destroyed devices in destroy_devices(). Print per-device action (added/removed) in zram_add() and zram_remove() instead. Example: [ 3645.259652] zram: Added device: zram5 [ 3646.152074] zram: Added device: zram6 [ 3650.585012] zram: Removed device: zram5 [ 3655.845584] zram: Added device: zram8 [ 3660.975223] zram: Removed device: zram6 Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 70aac2e..4429f54 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1238,6 +1238,8 @@ static int zram_add(int device_id) strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram->meta = NULL; zram->max_comp_streams = 1; + + pr_info("Added device: %s\n", zram->disk->disk_name); return 0; out_free_disk: @@ -1254,6 +1256,7 @@ out_free_dev: static void zram_remove(struct zram *zram) { + pr_info("Removed device: %s\n", zram->disk->disk_name); /* * Remove sysfs first, so no one will perform a disksize * store while we destroy the devices @@ -1280,7 +1283,6 @@ static void destroy_devices(void) idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); - pr_info("Destroyed device(s)\n"); } static int __init zram_init(void) @@ -1299,7 +1301,6 @@ static int __init zram_init(void) goto out_error; } - pr_info("Created %u device(s)\n", num_devices); return 0; out_error: -- cgit v1.1 From b31177f2a9d5b2cfb1da7a06a4a98273b40975a8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:16 -0700 Subject: zram: trivial: correct flag operations comment We don't have meta->tb_lock anymore and use meta table entry bit_spin_lock instead. update corresponding comment. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4429f54..4f76cf3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -70,7 +70,7 @@ static inline struct zram *dev_to_zram(struct device *dev) return (struct zram *)dev_to_disk(dev)->private_data; } -/* flag operations needs meta->tb_lock */ +/* flag operations require table entry bit_spin_lock() being held */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { -- cgit v1.1 From 92ff15288747b80730f0132e9c98403370c27b34 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:19 -0700 Subject: zram: return zram device_id from zram_add() This patch prepares zram to enable on-demand device creation. zram_add() performs automatic device_id assignment and returns new device id (>= 0) or error code (< 0). Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4f76cf3..a52a819 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1153,20 +1153,24 @@ static struct attribute_group zram_disk_attr_group = { .attrs = zram_disk_attrs, }; -static int zram_add(int device_id) +/* + * Allocate and initialize new zram device. the function returns + * '>= 0' device_id upon success, and negative value otherwise. + */ +static int zram_add(void) { struct zram *zram; struct request_queue *queue; - int ret; + int ret, device_id; zram = kzalloc(sizeof(struct zram), GFP_KERNEL); if (!zram) return -ENOMEM; - ret = idr_alloc(&zram_index_idr, zram, device_id, - device_id + 1, GFP_KERNEL); + ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); if (ret < 0) goto out_free_dev; + device_id = ret; init_rwsem(&zram->init_lock); @@ -1240,7 +1244,7 @@ static int zram_add(int device_id) zram->max_comp_streams = 1; pr_info("Added device: %s\n", zram->disk->disk_name); - return 0; + return device_id; out_free_disk: del_gendisk(zram->disk); @@ -1287,7 +1291,7 @@ static void destroy_devices(void) static int __init zram_init(void) { - int ret, dev_id; + int ret; zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { @@ -1295,10 +1299,11 @@ static int __init zram_init(void) return -EBUSY; } - for (dev_id = 0; dev_id < num_devices; dev_id++) { - ret = zram_add(dev_id); - if (ret != 0) + while (num_devices != 0) { + ret = zram_add(); + if (ret < 0) goto out_error; + num_devices--; } return 0; -- cgit v1.1 From f405c445a4866caa43101c231721123805a23bbf Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:21 -0700 Subject: zram: close race by open overriding [ Original patch from Minchan Kim ] Commit ba6b17d68c8e ("zram: fix umount-reset_store-mount race condition") introduced bdev->bd_mutex to protect a race between mount and reset. At that time, we don't have dynamic zram-add/remove feature so it was okay. However, as we introduce dynamic device feature, bd_mutex became trouble. CPU 0 echo 1 > /sys/block/zram/reset -> kernfs->s_active(A) -> zram:reset_store->bd_mutex(B) CPU 1 echo > /sys/class/zram/zram-remove ->zram:zram_remove: bd_mutex(B) -> sysfs_remove_group -> kernfs->s_active(A) IOW, AB -> BA deadlock The reason we are holding bd_mutex for zram_remove is to prevent any incoming open /dev/zram[0-9]. Otherwise, we could remove zram others already have opened. But it causes above deadlock problem. To fix the problem, this patch overrides block_device.open and it returns -EBUSY if zram asserts he claims zram to reset so any incoming open will be failed so we don't need to hold bd_mutex for zram_remove ayn more. This patch is to prepare for zram-add/remove feature. [sergey.senozhatsky@gmail.com: simplify reset_store()] Signed-off-by: Minchan Kim Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 53 +++++++++++++++++++++++++++---------------- drivers/block/zram/zram_drv.h | 4 ++++ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a52a819..6a3b36d5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1070,45 +1070,60 @@ static ssize_t reset_store(struct device *dev, struct zram *zram; struct block_device *bdev; + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + return ret; + + if (!do_reset) + return -EINVAL; + zram = dev_to_zram(dev); bdev = bdget_disk(zram->disk, 0); - if (!bdev) return -ENOMEM; mutex_lock(&bdev->bd_mutex); - /* Do not reset an active device! */ - if (bdev->bd_openers) { - ret = -EBUSY; - goto out; + /* Do not reset an active device or claimed device */ + if (bdev->bd_openers || zram->claim) { + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return -EBUSY; } - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - goto out; - - if (!do_reset) { - ret = -EINVAL; - goto out; - } + /* From now on, anyone can't open /dev/zram[0-9] */ + zram->claim = true; + mutex_unlock(&bdev->bd_mutex); - /* Make sure all pending I/O is finished */ + /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - - mutex_unlock(&bdev->bd_mutex); revalidate_disk(zram->disk); bdput(bdev); + mutex_lock(&bdev->bd_mutex); + zram->claim = false; + mutex_unlock(&bdev->bd_mutex); + return len; +} + +static int zram_open(struct block_device *bdev, fmode_t mode) +{ + int ret = 0; + struct zram *zram; + + WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); + + zram = bdev->bd_disk->private_data; + /* zram was claimed to reset so open request fails */ + if (zram->claim) + ret = -EBUSY; -out: - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return ret; } static const struct block_device_operations zram_devops = { + .open = zram_open, .swap_slot_free_notify = zram_slot_free_notify, .rw_page = zram_rw_page, .owner = THIS_MODULE diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 042994e..6dbe2df 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -115,5 +115,9 @@ struct zram { */ u64 disksize; /* bytes */ char compressor[10]; + /* + * zram is claimed so open request will be failed + */ + bool claim; /* Protected by bdev->bd_mutex */ }; #endif -- cgit v1.1 From 6566d1a32bf725a4fa9119f16270505451ad01ac Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:24 -0700 Subject: zram: add dynamic device add/remove functionality We currently don't support on-demand device creation. The one and only way to have N zram devices is to specify num_devices module parameter (default value: 1). IOW if, for some reason, at some point, user wants to have N + 1 devies he/she must umount all the existing devices, unload the module, load the module passing num_devices equals to N + 1. And do this again, if needed. This patch introduces zram control sysfs class, which has two sysfs attrs: - hot_add -- add a new zram device - hot_remove -- remove a specific (device_id) zram device hot_add sysfs attr is read-only and has only automatic device id assignment mode (as requested by Minchan Kim). read operation performed on this attr creates a new zram device and returns back its device_id or error status. Usage example: # add a new specific zram device cat /sys/class/zram-control/hot_add 2 # remove a specific zram device echo 4 > /sys/class/zram-control/hot_remove Returning zram_add() error code back to user (-ENOMEM in this case) cat /sys/class/zram-control/hot_add cat: /sys/class/zram-control/hot_add: Cannot allocate memory NOTE, there might be users who already depend on the fact that at least zram0 device gets always created by zram_init(). Preserve this behavior. [minchan@kernel.org: use zram->claim to avoid lockdep splat] Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/sysfs-class-zram | 24 +++++++ Documentation/blockdev/zram.txt | 23 ++++++- drivers/block/zram/zram_drv.c | 100 ++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 6 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-zram diff --git a/Documentation/ABI/testing/sysfs-class-zram b/Documentation/ABI/testing/sysfs-class-zram new file mode 100644 index 0000000..48ddacb --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-zram @@ -0,0 +1,24 @@ +What: /sys/class/zram-control/ +Date: August 2015 +KernelVersion: 4.2 +Contact: Sergey Senozhatsky +Description: + The zram-control/ class sub-directory belongs to zram + device class + +What: /sys/class/zram-control/hot_add +Date: August 2015 +KernelVersion: 4.2 +Contact: Sergey Senozhatsky +Description: + RO attribute. Read operation will cause zram to add a new + device and return its device id back to user (so one can + use /dev/zram), or error code. + +What: /sys/class/zram-control/hot_remove +Date: August 2015 +KernelVersion: 4.2 +Contact: Sergey Senozhatsky +Description: + WO attribute. Remove a specific /dev/zramX device, where X + is a device_id provided by user. diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt index 65e9430..c4de576 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.txt @@ -99,7 +99,24 @@ size of the disk when not in use so a huge zram is wasteful. mkfs.ext4 /dev/zram1 mount /dev/zram1 /tmp -7) Stats: +7) Add/remove zram devices + +zram provides a control interface, which enables dynamic (on-demand) device +addition and removal. + +In order to add a new /dev/zramX device, perform read operation on hot_add +attribute. This will return either new device's device id (meaning that you +can use /dev/zram) or error code. + +Example: + cat /sys/class/zram-control/hot_add + 1 + +To remove the existing /dev/zramX device (where X is a device id) +execute + echo X > /sys/class/zram-control/hot_remove + +8) Stats: Per-device statistics are exported as various nodes under /sys/block/zram/ A brief description of exported device attritbutes. For more details please @@ -174,11 +191,11 @@ line of text and contains the following stats separated by whitespace: zero_pages num_migrated -8) Deactivate: +9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -9) Reset: +10) Reset: Write any positive value to 'reset' sysfs node echo 1 > /sys/block/zram0/reset echo 1 > /sys/block/zram1/reset diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6a3b36d5..830f0eb 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -29,10 +29,14 @@ #include #include #include +#include #include "zram_drv.h" static DEFINE_IDR(zram_index_idr); +/* idr index must be protected */ +static DEFINE_MUTEX(zram_index_mutex); + static int zram_major; static const char *default_compressor = "lzo"; @@ -1273,24 +1277,104 @@ out_free_dev: return ret; } -static void zram_remove(struct zram *zram) +static int zram_remove(struct zram *zram) { - pr_info("Removed device: %s\n", zram->disk->disk_name); + struct block_device *bdev; + + bdev = bdget_disk(zram->disk, 0); + if (!bdev) + return -ENOMEM; + + mutex_lock(&bdev->bd_mutex); + if (bdev->bd_openers || zram->claim) { + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return -EBUSY; + } + + zram->claim = true; + mutex_unlock(&bdev->bd_mutex); + /* * Remove sysfs first, so no one will perform a disksize - * store while we destroy the devices + * store while we destroy the devices. This also helps during + * hot_remove -- zram_reset_device() is the last holder of + * ->init_lock, no later/concurrent disksize_store() or any + * other sysfs handlers are possible. */ sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, &zram_disk_attr_group); + /* Make sure all the pending I/O are finished */ + fsync_bdev(bdev); zram_reset_device(zram); + bdput(bdev); + + pr_info("Removed device: %s\n", zram->disk->disk_name); + idr_remove(&zram_index_idr, zram->disk->first_minor); blk_cleanup_queue(zram->disk->queue); del_gendisk(zram->disk); put_disk(zram->disk); kfree(zram); + return 0; +} + +/* zram-control sysfs attributes */ +static ssize_t hot_add_show(struct class *class, + struct class_attribute *attr, + char *buf) +{ + int ret; + + mutex_lock(&zram_index_mutex); + ret = zram_add(); + mutex_unlock(&zram_index_mutex); + + if (ret < 0) + return ret; + return scnprintf(buf, PAGE_SIZE, "%d\n", ret); +} + +static ssize_t hot_remove_store(struct class *class, + struct class_attribute *attr, + const char *buf, + size_t count) +{ + struct zram *zram; + int ret, dev_id; + + /* dev_id is gendisk->first_minor, which is `int' */ + ret = kstrtoint(buf, 10, &dev_id); + if (ret) + return ret; + if (dev_id < 0) + return -EINVAL; + + mutex_lock(&zram_index_mutex); + + zram = idr_find(&zram_index_idr, dev_id); + if (zram) + ret = zram_remove(zram); + else + ret = -ENODEV; + + mutex_unlock(&zram_index_mutex); + return ret ? ret : count; } +static struct class_attribute zram_control_class_attrs[] = { + __ATTR_RO(hot_add), + __ATTR_WO(hot_remove), + __ATTR_NULL, +}; + +static struct class zram_control_class = { + .name = "zram-control", + .owner = THIS_MODULE, + .class_attrs = zram_control_class_attrs, +}; + static int zram_remove_cb(int id, void *ptr, void *data) { zram_remove(ptr); @@ -1299,6 +1383,7 @@ static int zram_remove_cb(int id, void *ptr, void *data) static void destroy_devices(void) { + class_unregister(&zram_control_class); idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); @@ -1308,14 +1393,23 @@ static int __init zram_init(void) { int ret; + ret = class_register(&zram_control_class); + if (ret) { + pr_warn("Unable to register zram-control class\n"); + return ret; + } + zram_major = register_blkdev(0, "zram"); if (zram_major <= 0) { pr_warn("Unable to get major number\n"); + class_unregister(&zram_control_class); return -EBUSY; } while (num_devices != 0) { + mutex_lock(&zram_index_mutex); ret = zram_add(); + mutex_unlock(&zram_index_mutex); if (ret < 0) goto out_error; num_devices--; -- cgit v1.1 From 17162f41f04a3ba5a6b887a8e8e7f78159fa4a70 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:27 -0700 Subject: zram: cosmetic zram_bvec_write() cleanup `bool locked' local variable tells us if we should perform zcomp_strm_release() or not (jumped to `out' label before zcomp_strm_find() occurred), which is equivalent to `zstrm' being or not being NULL. remove `locked' and check `zstrm' instead. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 830f0eb..a129d52 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -642,8 +642,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; - struct zcomp_strm *zstrm; - bool locked = false; + struct zcomp_strm *zstrm = NULL; unsigned long alloced_pages; page = bvec->bv_page; @@ -663,7 +662,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } zstrm = zcomp_strm_find(zram->comp); - locked = true; user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -735,7 +733,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } zcomp_strm_release(zram->comp, zstrm); - locked = false; + zstrm = NULL; zs_unmap_object(meta->mem_pool, handle); /* @@ -753,7 +751,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic64_add(clen, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.pages_stored); out: - if (locked) + if (zstrm) zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); -- cgit v1.1 From 4bbacd51a683e45c13f7d6df6f85cb7391590311 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:29 -0700 Subject: zram: cut trailing newline in algorithm name Supplied sysfs values sometimes contain new-line symbols (echo vs. echo -n), which we also copy as a compression algorithm name. it works fine when we lookup for compression algorithm, because we use sysfs_streq() which takes care of new line symbols. however, it doesn't look nice when we print compression algorithm name if zcomp_create() failed: zram: Cannot initialise LXZ compressing backend cut trailing new-line, so the error string will look like zram: Cannot initialise LXZ compressing backend we also now can replace sysfs_streq() in zcomp_available_show() with strcmp(). Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 2 +- drivers/block/zram/zram_drv.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index f1ff39a..a1a8b8e 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -274,7 +274,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf) int i = 0; while (backends[i]) { - if (sysfs_streq(comp, backends[i]->name)) + if (!strcmp(comp, backends[i]->name)) sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "[%s] ", backends[i]->name); else diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a129d52..d94696b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -363,6 +363,8 @@ static ssize_t comp_algorithm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); + size_t sz; + down_write(&zram->init_lock); if (init_done(zram)) { up_write(&zram->init_lock); @@ -370,6 +372,12 @@ static ssize_t comp_algorithm_store(struct device *dev, return -EBUSY; } strlcpy(zram->compressor, buf, sizeof(zram->compressor)); + + /* ignore trailing newline */ + sz = strlen(zram->compressor); + if (sz > 0 && zram->compressor[sz - 1] == '\n') + zram->compressor[sz - 1] = 0x00; + up_write(&zram->init_lock); return len; } -- cgit v1.1 From d93435c3fba4a47b773693b0c8992470d38510d5 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Jun 2015 15:00:32 -0700 Subject: zram: check comp algorithm availability earlier Improvement idea by Marcin Jabrzyk. comp_algorithm_store() silently accepts any supplied algorithm name, because zram performs algorithm availability check later, during the device configuration phase in disksize_store() and emits the following error: "zram: Cannot initialise %s compressing backend" this error line is somewhat generic and, besides, can indicate a failed attempt to allocate compression backend's working buffers. add algorithm availability check to comp_algorithm_store(): echo lzz > /sys/block/zram0/comp_algorithm -bash: echo: write error: Invalid argument Signed-off-by: Sergey Senozhatsky Reported-by: Marcin Jabrzyk Acked-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zcomp.c | 5 +++++ drivers/block/zram/zcomp.h | 1 + drivers/block/zram/zram_drv.c | 3 +++ 3 files changed, 9 insertions(+) diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index a1a8b8e..965d1af 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -286,6 +286,11 @@ ssize_t zcomp_available_show(const char *comp, char *buf) return sz; } +bool zcomp_available_algorithm(const char *comp) +{ + return find_backend(comp) != NULL; +} + bool zcomp_set_max_streams(struct zcomp *comp, int num_strm) { return comp->set_max_streams(comp, num_strm); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index c59d1fc..46e2b9f 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -51,6 +51,7 @@ struct zcomp { }; ssize_t zcomp_available_show(const char *comp, char *buf); +bool zcomp_available_algorithm(const char *comp); struct zcomp *zcomp_create(const char *comp, int max_strm); void zcomp_destroy(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d94696b..fb655e8 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -378,6 +378,9 @@ static ssize_t comp_algorithm_store(struct device *dev, if (sz > 0 && zram->compressor[sz - 1] == '\n') zram->compressor[sz - 1] = 0x00; + if (!zcomp_available_algorithm(zram->compressor)) + len = -EINVAL; + up_write(&zram->init_lock); return len; } -- cgit v1.1 From c00ed16a9eb98a7fc076e227bdd95c1451ca1e6e Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 25 Jun 2015 15:00:35 -0700 Subject: zswap: runtime enable/disable Change the "enabled" parameter to be configurable at runtime. Remove the enabled check from init(), and move it to the frontswap store() function; when enabled, pages will be stored, and when disabled, pages won't be stored. This is almost identical to Seth's patch from 2 years ago: http://lkml.iu.edu/hypermail/linux/kernel/1307.2/04289.html [akpm@linux-foundation.org: tweak documentation] Signed-off-by: Dan Streetman Suggested-by: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/zswap.txt | 18 ++++++++++++++++-- mm/zswap.c | 12 +++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt index 00c3d31..8458c08 100644 --- a/Documentation/vm/zswap.txt +++ b/Documentation/vm/zswap.txt @@ -26,8 +26,22 @@ Zswap evicts pages from compressed cache on an LRU basis to the backing swap device when the compressed pool reaches its size limit. This requirement had been identified in prior community discussions. -To enabled zswap, the "enabled" attribute must be set to 1 at boot time. e.g. -zswap.enabled=1 +Zswap is disabled by default but can be enabled at boot time by setting +the "enabled" attribute to 1 at boot time. ie: zswap.enabled=1. Zswap +can also be enabled and disabled at runtime using the sysfs interface. +An example command to enable zswap at runtime, assuming sysfs is mounted +at /sys, is: + +echo 1 > /sys/modules/zswap/parameters/enabled + +When zswap is disabled at runtime it will stop storing pages that are +being swapped out. However, it will _not_ immediately write out or fault +back into memory all of the pages stored in the compressed pool. The +pages stored in zswap will remain in the compressed pool until they are +either invalidated or faulted back into memory. In order to force all +pages out of the compressed pool, a swapoff on the swap device(s) will +fault back into memory all swapped out pages, including those in the +compressed pool. Design: diff --git a/mm/zswap.c b/mm/zswap.c index 4249e82f..2d5727b 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -75,9 +75,10 @@ static u64 zswap_duplicate_entry; /********************************* * tunables **********************************/ -/* Enable/disable zswap (disabled by default, fixed at boot for now) */ -static bool zswap_enabled __read_mostly; -module_param_named(enabled, zswap_enabled, bool, 0444); + +/* Enable/disable zswap (disabled by default) */ +static bool zswap_enabled; +module_param_named(enabled, zswap_enabled, bool, 0644); /* Compressor to be used by zswap (fixed at boot for now) */ #define ZSWAP_COMPRESSOR_DEFAULT "lzo" @@ -648,7 +649,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, u8 *src, *dst; struct zswap_header *zhdr; - if (!tree) { + if (!zswap_enabled || !tree) { ret = -ENODEV; goto reject; } @@ -901,9 +902,6 @@ static int __init init_zswap(void) { gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; - if (!zswap_enabled) - return 0; - pr_info("loading zswap\n"); zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, -- cgit v1.1 From cf41f5f496d68f2ced1fc77871c63d1c526fa100 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 25 Jun 2015 15:00:37 -0700 Subject: zpool: change pr_info to pr_debug Change the pr_info() calls to pr_debug(). There's no need for the extra verbosity in the log. Also change the msg formats to be consistent. Signed-off-by: Dan Streetman Cc: Kees Cook Cc: Minchan Kim Cc: Ganesh Mahendran Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zpool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/zpool.c b/mm/zpool.c index bacdab6..6b1f103 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -147,7 +147,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, struct zpool_driver *driver; struct zpool *zpool; - pr_info("creating pool type %s\n", type); + pr_debug("creating pool type %s\n", type); driver = zpool_get_driver(type); @@ -180,7 +180,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, return NULL; } - pr_info("created %s pool\n", type); + pr_debug("created pool type %s\n", type); spin_lock(&pools_lock); list_add(&zpool->list, &pools_head); @@ -202,7 +202,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, */ void zpool_destroy_pool(struct zpool *zpool) { - pr_info("destroying pool type %s\n", zpool->type); + pr_debug("destroying pool type %s\n", zpool->type); spin_lock(&pools_lock); list_del(&zpool->list); -- cgit v1.1 From 479305fd7172503772575997eb6f1b0a2bb4a107 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 25 Jun 2015 15:00:40 -0700 Subject: zpool: remove zpool_evict() Remove zpool_evict() helper function. As zbud is currently the only zpool implementation that supports eviction, add zpool and zpool_ops references to struct zbud_pool and directly call zpool_ops->evict(zpool, handle) on eviction. Currently zpool provides the zpool_evict helper which locks the zpool list lock and searches through all pools to find the specific one matching the caller, and call the corresponding zpool_ops->evict function. However, this is unnecessary, as the zbud pool can simply keep a reference to the zpool that created it, as well as the zpool_ops, and directly call the zpool_ops->evict function, when it needs to evict a page. This avoids a spinlock and list search in zpool for each eviction. Signed-off-by: Dan Streetman Cc: Seth Jennings Cc: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 5 ++--- mm/zbud.c | 23 +++++++++++++++++++---- mm/zpool.c | 29 +---------------------------- mm/zsmalloc.c | 3 ++- 4 files changed, 24 insertions(+), 36 deletions(-) diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 56529b3..d30eff3 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -81,7 +81,8 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops); + void *(*create)(char *name, gfp_t gfp, struct zpool_ops *ops, + struct zpool *zpool); void (*destroy)(void *pool); int (*malloc)(void *pool, size_t size, gfp_t gfp, @@ -102,6 +103,4 @@ void zpool_register_driver(struct zpool_driver *driver); int zpool_unregister_driver(struct zpool_driver *driver); -int zpool_evict(void *pool, unsigned long handle); - #endif diff --git a/mm/zbud.c b/mm/zbud.c index 2ee4e45..f3bf6f7 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -97,6 +97,10 @@ struct zbud_pool { struct list_head lru; u64 pages_nr; struct zbud_ops *ops; +#ifdef CONFIG_ZPOOL + struct zpool *zpool; + struct zpool_ops *zpool_ops; +#endif }; /* @@ -123,7 +127,10 @@ struct zbud_header { static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle) { - return zpool_evict(pool, handle); + if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict) + return pool->zpool_ops->evict(pool->zpool, handle); + else + return -ENOENT; } static struct zbud_ops zbud_zpool_ops = { @@ -131,9 +138,17 @@ static struct zbud_ops zbud_zpool_ops = { }; static void *zbud_zpool_create(char *name, gfp_t gfp, - struct zpool_ops *zpool_ops) + struct zpool_ops *zpool_ops, + struct zpool *zpool) { - return zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); + struct zbud_pool *pool; + + pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL); + if (pool) { + pool->zpool = zpool; + pool->zpool_ops = zpool_ops; + } + return pool; } static void zbud_zpool_destroy(void *pool) @@ -292,7 +307,7 @@ struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops) struct zbud_pool *pool; int i; - pool = kmalloc(sizeof(struct zbud_pool), gfp); + pool = kzalloc(sizeof(struct zbud_pool), gfp); if (!pool) return NULL; spin_lock_init(&pool->lock); diff --git a/mm/zpool.c b/mm/zpool.c index 6b1f103..722a4f6 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -73,33 +73,6 @@ int zpool_unregister_driver(struct zpool_driver *driver) } EXPORT_SYMBOL(zpool_unregister_driver); -/** - * zpool_evict() - evict callback from a zpool implementation. - * @pool: pool to evict from. - * @handle: handle to evict. - * - * This can be used by zpool implementations to call the - * user's evict zpool_ops struct evict callback. - */ -int zpool_evict(void *pool, unsigned long handle) -{ - struct zpool *zpool; - - spin_lock(&pools_lock); - list_for_each_entry(zpool, &pools_head, list) { - if (zpool->pool == pool) { - spin_unlock(&pools_lock); - if (!zpool->ops || !zpool->ops->evict) - return -EINVAL; - return zpool->ops->evict(zpool, handle); - } - } - spin_unlock(&pools_lock); - - return -ENOENT; -} -EXPORT_SYMBOL(zpool_evict); - static struct zpool_driver *zpool_get_driver(char *type) { struct zpool_driver *driver; @@ -170,7 +143,7 @@ struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, zpool->type = driver->type; zpool->driver = driver; - zpool->pool = driver->create(name, gfp, ops); + zpool->pool = driver->create(name, gfp, ops, zpool); zpool->ops = ops; if (!zpool->pool) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c766240..0a7f81a 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -309,7 +309,8 @@ static void record_obj(unsigned long handle, unsigned long obj) #ifdef CONFIG_ZPOOL -static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops) +static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops, + struct zpool *zpool) { return zs_create_pool(name, gfp); } -- cgit v1.1 From 3fe111fc64018747fbc9c0c39c110c165b43cf10 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 25 Jun 2015 15:00:43 -0700 Subject: frv: remove unused inline function is_in_rom() The function is not used anywhere in the tree (anymore) and this is the last remaining instance, so remove it. Signed-off-by: Tobias Klauser Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/sections.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/frv/include/asm/sections.h b/arch/frv/include/asm/sections.h index 17d0fb1..d03fb64 100644 --- a/arch/frv/include/asm/sections.h +++ b/arch/frv/include/asm/sections.h @@ -35,12 +35,6 @@ extern unsigned long __nongprelbss memory_start; extern unsigned long __nongprelbss memory_end; extern unsigned long __nongprelbss rom_length; -/* determine if we're running from ROM */ -static inline int is_in_rom(unsigned long addr) -{ - return 0; /* default case: not in ROM */ -} - #endif #endif #endif /* _ASM_SECTIONS_H */ -- cgit v1.1 From 0989e1f98e3ae30deebf732d0861ca47bb55e25b Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 25 Jun 2015 15:00:46 -0700 Subject: frv: use for_each_sg() This replaces the plain loop over the sglist array with for_each_sg() macro which consists of sg_next() function calls. Since frv doesn't select ARCH_HAS_SG_CHAIN, it is not necessary to use for_each_sg() in order to loop over each sg element. But this can help find problems with drivers that do not properly initialize their sg tables when CONFIG_DEBUG_SG is enabled. Signed-off-by: Akinobu Mita Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/mb93090-mb00/pci-dma-nommu.c | 10 ++++++---- arch/frv/mb93090-mb00/pci-dma.c | 7 ++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index b99c2a7..8eeea0d 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -119,14 +119,16 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, EXPORT_SYMBOL(dma_map_single); -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, +int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction) { int i; + struct scatterlist *sg; - for (i=0; i Date: Thu, 25 Jun 2015 15:00:48 -0700 Subject: avr32: use for_each_sg() This replaces the plain loop over the sglist array with for_each_sg() macro which consists of sg_next() function calls. Since avr32 doesn't select ARCH_HAS_SG_CHAIN, it is not necessary to use for_each_sg() in order to loop over each sg element. But this can help find problems with drivers that do not properly initialize their sg tables when CONFIG_DEBUG_SG is enabled. Signed-off-by: Akinobu Mita Cc: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/include/asm/dma-mapping.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/avr32/include/asm/dma-mapping.h b/arch/avr32/include/asm/dma-mapping.h index b3d18f9..ae7ac92 100644 --- a/arch/avr32/include/asm/dma-mapping.h +++ b/arch/avr32/include/asm/dma-mapping.h @@ -209,17 +209,18 @@ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, * the same here. */ static inline int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, +dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction) { int i; + struct scatterlist *sg; - for (i = 0; i < nents; i++) { + for_each_sg(sglist, sg, nents, i) { char *virt; - sg[i].dma_address = page_to_bus(sg_page(&sg[i])) + sg[i].offset; - virt = sg_virt(&sg[i]); - dma_cache_sync(dev, virt, sg[i].length, direction); + sg->dma_address = page_to_bus(sg_page(sg)) + sg->offset; + virt = sg_virt(sg); + dma_cache_sync(dev, virt, sg->length, direction); } return nents; @@ -321,14 +322,14 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, } static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sglist, int nents, enum dma_data_direction direction) { int i; + struct scatterlist *sg; - for (i = 0; i < nents; i++) { - dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, direction); - } + for_each_sg(sglist, sg, nents, i) + dma_cache_sync(dev, sg_virt(sg), sg->length, direction); } /* Now for the API extensions over the pci_ one */ -- cgit v1.1 From 4a00e9df293d010acbea118b9521e08cb85016c6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Jun 2015 15:00:51 -0700 Subject: prctl: more prctl(PR_SET_MM_*) checks Individual prctl(PR_SET_MM_*) calls do some checking to maintain a consistent view of mm->arg_start et al fields, but not enough. In particular PR_SET_MM_ARG_START/PR_SET_MM_ARG_END/ R_SET_MM_ENV_START/ PR_SET_MM_ENV_END only check that the address lies in an existing VMA, but don't check that the start address is lower than the end address _at all_. Consolidate all consistency checks, so there will be no difference in the future between PR_SET_MM_MAP and individual PR_SET_MM_* calls. The program below makes both ARGV and ENVP areas be reversed. It makes /proc/$PID/cmdline show garbage (it doesn't oops by luck). #include #include #include enum {PAGE_SIZE=4096}; int main(void) { void *p; p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); #define PR_SET_MM 35 #define PR_SET_MM_ARG_START 8 #define PR_SET_MM_ARG_END 9 #define PR_SET_MM_ENV_START 10 #define PR_SET_MM_ENV_END 11 prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long)p + PAGE_SIZE - 1, 0, 0); prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long)p, 0, 0); prctl(PR_SET_MM, PR_SET_MM_ENV_START, (unsigned long)p + PAGE_SIZE - 1, 0, 0); prctl(PR_SET_MM, PR_SET_MM_ENV_END, (unsigned long)p, 0, 0); pause(); return 0; } [akpm@linux-foundation.org: tidy code, tweak comment] Signed-off-by: Alexey Dobriyan Acked-by: Cyrill Gorcunov Cc: Jarod Wilson Cc: Jan Stancek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 158 ++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 91 insertions(+), 67 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 8571296..259fda2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1722,7 +1722,6 @@ exit_err: goto exit; } -#ifdef CONFIG_CHECKPOINT_RESTORE /* * WARNING: we don't require any capability here so be very careful * in what is allowed for modification from userspace. @@ -1818,6 +1817,7 @@ out: return error; } +#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) { struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; @@ -1902,10 +1902,41 @@ out: } #endif /* CONFIG_CHECKPOINT_RESTORE */ +static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + /* + * This doesn't move the auxiliary vector itself since it's pinned to + * mm_struct, but it permits filling the vector with new values. It's + * up to the caller to provide sane values here, otherwise userspace + * tools which use this vector might be unhappy. + */ + unsigned long user_auxv[AT_VECTOR_SIZE]; + + if (len > sizeof(user_auxv)) + return -EINVAL; + + if (copy_from_user(user_auxv, (const void __user *)addr, len)) + return -EFAULT; + + /* Make sure the last entry is always AT_NULL */ + user_auxv[AT_VECTOR_SIZE - 2] = 0; + user_auxv[AT_VECTOR_SIZE - 1] = 0; + + BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); + + task_lock(current); + memcpy(mm->saved_auxv, user_auxv, len); + task_unlock(current); + + return 0; +} + static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { struct mm_struct *mm = current->mm; + struct prctl_mm_map prctl_map; struct vm_area_struct *vma; int error; @@ -1925,6 +1956,9 @@ static int prctl_set_mm(int opt, unsigned long addr, if (opt == PR_SET_MM_EXE_FILE) return prctl_set_mm_exe_file(mm, (unsigned int)addr); + if (opt == PR_SET_MM_AUXV) + return prctl_set_auxv(mm, addr, arg4); + if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; @@ -1933,42 +1967,64 @@ static int prctl_set_mm(int opt, unsigned long addr, down_read(&mm->mmap_sem); vma = find_vma(mm, addr); + prctl_map.start_code = mm->start_code; + prctl_map.end_code = mm->end_code; + prctl_map.start_data = mm->start_data; + prctl_map.end_data = mm->end_data; + prctl_map.start_brk = mm->start_brk; + prctl_map.brk = mm->brk; + prctl_map.start_stack = mm->start_stack; + prctl_map.arg_start = mm->arg_start; + prctl_map.arg_end = mm->arg_end; + prctl_map.env_start = mm->env_start; + prctl_map.env_end = mm->env_end; + prctl_map.auxv = NULL; + prctl_map.auxv_size = 0; + prctl_map.exe_fd = -1; + switch (opt) { case PR_SET_MM_START_CODE: - mm->start_code = addr; + prctl_map.start_code = addr; break; case PR_SET_MM_END_CODE: - mm->end_code = addr; + prctl_map.end_code = addr; break; case PR_SET_MM_START_DATA: - mm->start_data = addr; + prctl_map.start_data = addr; break; case PR_SET_MM_END_DATA: - mm->end_data = addr; + prctl_map.end_data = addr; + break; + case PR_SET_MM_START_STACK: + prctl_map.start_stack = addr; break; - case PR_SET_MM_START_BRK: - if (addr <= mm->end_data) - goto out; - - if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr, - mm->end_data, mm->start_data)) - goto out; - - mm->start_brk = addr; + prctl_map.start_brk = addr; break; - case PR_SET_MM_BRK: - if (addr <= mm->end_data) - goto out; - - if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk, - mm->end_data, mm->start_data)) - goto out; - - mm->brk = addr; + prctl_map.brk = addr; break; + case PR_SET_MM_ARG_START: + prctl_map.arg_start = addr; + break; + case PR_SET_MM_ARG_END: + prctl_map.arg_end = addr; + break; + case PR_SET_MM_ENV_START: + prctl_map.env_start = addr; + break; + case PR_SET_MM_ENV_END: + prctl_map.env_end = addr; + break; + default: + goto out; + } + + error = validate_prctl_map(&prctl_map); + if (error) + goto out; + switch (opt) { /* * If command line arguments and environment * are placed somewhere else on stack, we can @@ -1985,52 +2041,20 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EFAULT; goto out; } - if (opt == PR_SET_MM_START_STACK) - mm->start_stack = addr; - else if (opt == PR_SET_MM_ARG_START) - mm->arg_start = addr; - else if (opt == PR_SET_MM_ARG_END) - mm->arg_end = addr; - else if (opt == PR_SET_MM_ENV_START) - mm->env_start = addr; - else if (opt == PR_SET_MM_ENV_END) - mm->env_end = addr; - break; - - /* - * This doesn't move auxiliary vector itself - * since it's pinned to mm_struct, but allow - * to fill vector with new values. It's up - * to a caller to provide sane values here - * otherwise user space tools which use this - * vector might be unhappy. - */ - case PR_SET_MM_AUXV: { - unsigned long user_auxv[AT_VECTOR_SIZE]; - - if (arg4 > sizeof(user_auxv)) - goto out; - up_read(&mm->mmap_sem); - - if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) - return -EFAULT; - - /* Make sure the last entry is always AT_NULL */ - user_auxv[AT_VECTOR_SIZE - 2] = 0; - user_auxv[AT_VECTOR_SIZE - 1] = 0; - - BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); - - task_lock(current); - memcpy(mm->saved_auxv, user_auxv, arg4); - task_unlock(current); - - return 0; - } - default: - goto out; } + mm->start_code = prctl_map.start_code; + mm->end_code = prctl_map.end_code; + mm->start_data = prctl_map.start_data; + mm->end_data = prctl_map.end_data; + mm->start_brk = prctl_map.start_brk; + mm->brk = prctl_map.brk; + mm->start_stack = prctl_map.start_stack; + mm->arg_start = prctl_map.arg_start; + mm->arg_end = prctl_map.arg_end; + mm->env_start = prctl_map.env_start; + mm->env_end = prctl_map.env_end; + error = 0; out: up_read(&mm->mmap_sem); -- cgit v1.1 From c2c0bb44620dece7ec97e28167e32c343da22867 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Jun 2015 15:00:54 -0700 Subject: proc: fix PAGE_SIZE limit of /proc/$PID/cmdline /proc/$PID/cmdline truncates output at PAGE_SIZE. It is easy to see with $ cat /proc/self/cmdline $(seq 1037) 2>/dev/null However, command line size was never limited to PAGE_SIZE but to 128 KB and relatively recently limitation was removed altogether. People noticed and ask questions: http://stackoverflow.com/questions/199130/how-do-i-increase-the-proc-pid-cmdline-4096-byte-limit seq file interface is not OK, because it kmalloc's for whole output and open + read(, 1) + sleep will pin arbitrary amounts of kernel memory. To not do that, limit must be imposed which is incompatible with arbitrary sized command lines. I apologize for hairy code, but this it direct consequence of command line layout in memory and hacks to support things like "init [3]". The loops are "unrolled" otherwise it is either macros which hide control flow or functions with 7-8 arguments with equal line count. There should be real setproctitle(2) or something. [akpm@linux-foundation.org: fix a billion min() warnings] Signed-off-by: Alexey Dobriyan Tested-by: Jarod Wilson Acked-by: Jarod Wilson Cc: Cyrill Gorcunov Cc: Jan Stancek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 196 insertions(+), 9 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 286a422..bd7a9af 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -196,18 +196,205 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static int proc_pid_cmdline(struct seq_file *m, struct pid_namespace *ns, - struct pid *pid, struct task_struct *task) +static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, + size_t _count, loff_t *pos) { + struct task_struct *tsk; + struct mm_struct *mm; + char *page; + unsigned long count = _count; + unsigned long arg_start, arg_end, env_start, env_end; + unsigned long len1, len2, len; + unsigned long p; + char c; + ssize_t rv; + + BUG_ON(*pos < 0); + + tsk = get_proc_task(file_inode(file)); + if (!tsk) + return -ESRCH; + mm = get_task_mm(tsk); + put_task_struct(tsk); + if (!mm) + return 0; + /* Check if process spawned far enough to have cmdline. */ + if (!mm->env_end) { + rv = 0; + goto out_mmput; + } + + page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) { + rv = -ENOMEM; + goto out_mmput; + } + + down_read(&mm->mmap_sem); + arg_start = mm->arg_start; + arg_end = mm->arg_end; + env_start = mm->env_start; + env_end = mm->env_end; + up_read(&mm->mmap_sem); + + BUG_ON(arg_start > arg_end); + BUG_ON(env_start > env_end); + + len1 = arg_end - arg_start; + len2 = env_end - env_start; + /* - * Rely on struct seq_operations::show() being called once - * per internal buffer allocation. See single_open(), traverse(). + * Inherently racy -- command line shares address space + * with code and data. */ - BUG_ON(m->size < PAGE_SIZE); - m->count += get_cmdline(task, m->buf, PAGE_SIZE); - return 0; + rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + if (rv <= 0) + goto out_free_page; + + rv = 0; + + if (c == '\0') { + /* Command line (set of strings) occupies whole ARGV. */ + if (len1 <= *pos) + goto out_free_page; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count; + int nr_read; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + } + } else { + /* + * Command line (1 string) occupies ARGV and maybe + * extends into ENVP. + */ + if (len1 + len2 <= *pos) + goto skip_argv_envp; + if (len1 <= *pos) + goto skip_argv; + + p = arg_start + *pos; + len = len1 - *pos; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv: + /* + * Command line (1 string) occupies ARGV and + * extends into ENVP. + */ + if (len1 <= *pos) { + p = env_start + *pos - len1; + len = len1 + len2 - *pos; + } else { + p = env_start; + len = len2; + } + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* Find EOS. */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; + } +skip_argv_envp: + ; + } + +out_free_page: + free_page((unsigned long)page); +out_mmput: + mmput(mm); + if (rv > 0) + *pos += rv; + return rv; } +static const struct file_operations proc_pid_cmdline_ops = { + .read = proc_pid_cmdline_read, + .llseek = generic_file_llseek, +}; + static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -2572,7 +2759,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - ONE("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), @@ -2918,7 +3105,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif - ONE("cmdline", S_IRUGO, proc_pid_cmdline), + REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), -- cgit v1.1 From 2e13ba54a2682eea24918b87ad3edf70c2cf085b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20L=C3=B3pez=20Galeiras?= Date: Thu, 25 Jun 2015 15:00:57 -0700 Subject: fs, proc: introduce CONFIG_PROC_CHILDREN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 818411616baf ("fs, proc: introduce /proc//task//children entry") introduced the children entry for checkpoint restore and the file is only available on kernels configured with CONFIG_EXPERT and CONFIG_CHECKPOINT_RESTORE. This is available in most distributions (Fedora, Debian, Ubuntu, CoreOS) because they usually enable CONFIG_EXPERT and CONFIG_CHECKPOINT_RESTORE. But Arch does not enable CONFIG_EXPERT or CONFIG_CHECKPOINT_RESTORE. However, the children proc file is useful outside of checkpoint restore. I would like to use it in rkt. The rkt process exec() another program it does not control, and that other program will fork()+exec() a child process. I would like to find the pid of the child process from an external tool without iterating in /proc over all processes to find which one has a parent pid equal to rkt. This commit introduces CONFIG_PROC_CHILDREN and makes CONFIG_CHECKPOINT_RESTORE select it. This allows enabling /proc//task//children without needing to enable CONFIG_CHECKPOINT_RESTORE and CONFIG_EXPERT. Alban tested that /proc//task//children is present when the kernel is configured with CONFIG_PROC_CHILDREN=y but without CONFIG_CHECKPOINT_RESTORE Signed-off-by: Iago López Galeiras Tested-by: Alban Crequy Reviewed-by: Cyrill Gorcunov Cc: Oleg Nesterov Cc: Kees Cook Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: KAMEZAWA Hiroyuki Cc: Alexander Viro Cc: Andy Lutomirski Cc: Djalal Harouni Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/Kconfig | 4 ++++ fs/proc/array.c | 4 ++-- fs/proc/base.c | 2 +- init/Kconfig | 1 + 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 2183fcf..d751fcb 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -71,3 +71,7 @@ config PROC_PAGE_MONITOR /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, /proc/kpagecount, and /proc/kpageflags. Disabling these interfaces will reduce the size of the kernel by approximately 4kb. + +config PROC_CHILDREN + bool "Include /proc//task//children file" + default n diff --git a/fs/proc/array.c b/fs/proc/array.c index 3f57dac..ce065cf 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -577,7 +577,7 @@ int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, return 0; } -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_PROC_CHILDREN static struct pid * get_children_pid(struct inode *inode, struct pid *pid_prev, loff_t pos) { @@ -700,4 +700,4 @@ const struct file_operations proc_tid_children_operations = { .llseek = seq_lseek, .release = children_seq_release, }; -#endif /* CONFIG_CHECKPOINT_RESTORE */ +#endif /* CONFIG_PROC_CHILDREN */ diff --git a/fs/proc/base.c b/fs/proc/base.c index bd7a9af..1d540b3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3109,7 +3109,7 @@ static const struct pid_entry tid_base_stuff[] = { ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_tid_maps_operations), -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA diff --git a/init/Kconfig b/init/Kconfig index b999fa3..6a930b7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1131,6 +1131,7 @@ endif # CGROUPS config CHECKPOINT_RESTORE bool "Checkpoint/restore support" if EXPERT + select PROC_CHILDREN default n help Enables additional kernel features in a sake of checkpoint/restore. -- cgit v1.1 From f6d133f877c8bb0a0934dc8c521c758ee771e901 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:00 -0700 Subject: compiler-gcc.h: neatening - Move the inline and noinline blocks together - Comment neatening - Alignment of __attribute__ uses - Consistent naming of __must_be_array macro argument - Multiline macro neatening Signed-off-by: Joe Perches Cc: Andi Kleen Cc: Michal Marek Cc: Segher Boessenkool Cc: Sasha Levin Cc: Anton Blanchard Cc: Alan Modra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 85 +++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 371e560..5c2c14e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -5,9 +5,9 @@ /* * Common definitions for all gcc versions go here. */ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) /* Optimization barrier */ @@ -46,55 +46,63 @@ * the inline assembly constraint from =g to =r, in this particular * case either is valid. */ -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ - (typeof(ptr)) (__ptr + (off)); }) +#define RELOC_HIDE(ptr, off) \ +({ \ + unsigned long __ptr; \ + __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ + (typeof(ptr)) (__ptr + (off)); \ +}) /* Make the optimizer believe the variable can be manipulated arbitrarily. */ -#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var)) +#define OPTIMIZER_HIDE_VAR(var) \ + __asm__ ("" : "=r" (var) : "0" (var)) #ifdef __CHECKER__ -#define __must_be_array(arr) 0 +#define __must_be_array(a) 0 #else /* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old: */ -#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ +#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) notrace -# define __inline__ __inline__ __attribute__((always_inline)) notrace -# define __inline __inline __attribute__((always_inline)) notrace +#define inline inline __attribute__((always_inline)) notrace +#define __inline__ __inline__ __attribute__((always_inline)) notrace +#define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ -# define inline inline notrace -# define __inline__ __inline__ notrace -# define __inline __inline notrace +#define inline inline notrace +#define __inline__ __inline__ notrace +#define __inline __inline notrace #endif -#define __deprecated __attribute__((deprecated)) -#define __packed __attribute__((packed)) -#define __weak __attribute__((weak)) -#define __alias(symbol) __attribute__((alias(#symbol))) +#define __always_inline inline __attribute__((always_inline)) +#define noinline __attribute__((noinline)) + +#define __deprecated __attribute__((deprecated)) +#define __packed __attribute__((packed)) +#define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* - * it doesn't make sense on ARM (currently the only user of __naked) to trace - * naked functions because then mcount is called without stack and frame pointer - * being set up and there is no chance to restore the lr register to the value - * before mcount was called. + * it doesn't make sense on ARM (currently the only user of __naked) + * to trace naked functions because then mcount is called without + * stack and frame pointer being set up and there is no chance to + * restore the lr register to the value before mcount was called. + * + * The asm() bodies of naked functions often depend on standard calling + * conventions, therefore they must be noinline and noclone. * - * The asm() bodies of naked functions often depend on standard calling conventions, - * therefore they must be noinline and noclone. GCC 4.[56] currently fail to enforce - * this, so we must do so ourselves. See GCC PR44290. + * GCC 4.[56] currently fail to enforce this, so we must do so ourselves. + * See GCC PR44290. */ -#define __naked __attribute__((naked)) noinline __noclone notrace +#define __naked __attribute__((naked)) noinline __noclone notrace -#define __noreturn __attribute__((noreturn)) +#define __noreturn __attribute__((noreturn)) /* * From the GCC manual: @@ -106,14 +114,13 @@ * would be. * [...] */ -#define __pure __attribute__((pure)) -#define __aligned(x) __attribute__((aligned(x))) -#define __printf(a, b) __attribute__((format(printf, a, b))) -#define __scanf(a, b) __attribute__((format(scanf, a, b))) -#define noinline __attribute__((noinline)) -#define __attribute_const__ __attribute__((__const__)) -#define __maybe_unused __attribute__((unused)) -#define __always_unused __attribute__((unused)) +#define __pure __attribute__((pure)) +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __scanf(a, b) __attribute__((format(scanf, a, b))) +#define __attribute_const__ __attribute__((__const__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) @@ -129,5 +136,3 @@ * code */ #define uninitialized_var(x) x = x - -#define __always_inline inline __attribute__((always_inline)) -- cgit v1.1 From cb984d101b30eb7478d32df56a0023e4603cba7f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:02 -0700 Subject: compiler-gcc: integrate the various compiler-gcc[345].h files As gcc major version numbers are going to advance rather rapidly in the future, there's no real value in separate files for each compiler version. Deduplicate some of the macros #defined in each file too. Neaten comments using normal kernel commenting style. Signed-off-by: Joe Perches Cc: Andi Kleen Cc: Michal Marek Cc: Segher Boessenkool Cc: Sasha Levin Cc: Anton Blanchard Cc: Alan Modra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 120 ++++++++++++++++++++++++++++++++++++++++-- include/linux/compiler-gcc3.h | 23 -------- include/linux/compiler-gcc4.h | 91 -------------------------------- include/linux/compiler-gcc5.h | 67 ----------------------- 4 files changed, 116 insertions(+), 185 deletions(-) delete mode 100644 include/linux/compiler-gcc3.h delete mode 100644 include/linux/compiler-gcc4.h delete mode 100644 include/linux/compiler-gcc5.h diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5c2c14e..dfaa7b3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -122,10 +122,122 @@ #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) -#define __gcc_header(x) #x -#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) -#define gcc_header(x) _gcc_header(x) -#include gcc_header(__GNUC__) +/* gcc version specific checks */ + +#if GCC_VERSION < 30200 +# error Sorry, your compiler is too old - please upgrade it. +#endif + +#if GCC_VERSION < 30300 +# define __used __attribute__((__unused__)) +#else +# define __used __attribute__((__used__)) +#endif + +#ifdef CONFIG_GCOV_KERNEL +# if GCC_VERSION < 30400 +# error "GCOV profiling support for gcc versions below 3.4 not included" +# endif /* __GNUC_MINOR__ */ +#endif /* CONFIG_GCOV_KERNEL */ + +#if GCC_VERSION >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#endif + +#if GCC_VERSION >= 40000 + +/* GCC 4.1.[01] miscompiles __weak */ +#ifdef __KERNEL__ +# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 +# error Your version of gcc miscompiles the __weak directive +# endif +#endif + +#define __used __attribute__((__used__)) +#define __compiler_offsetof(a, b) \ + __builtin_offsetof(a, b) + +#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 +# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif + +#if GCC_VERSION >= 40300 +/* Mark functions as cold. gcc will assume any path leading to a call + * to them will be unlikely. This means a lot of manual unlikely()s + * are unnecessary now for any paths leading to the usual suspects + * like BUG(), printk(), panic() etc. [but let's keep them for now for + * older compilers] + * + * Early snapshots of gcc 4.3 don't support this and we can't detect this + * in the preprocessor, but we can live with this because they're unreleased. + * Maketime probing would be overkill here. + * + * gcc also has a __attribute__((__hot__)) to move hot functions into + * a special section, but I don't see any sense in this right now in + * the kernel context + */ +#define __cold __attribute__((__cold__)) + +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#ifndef __CHECKER__ +# define __compiletime_warning(message) __attribute__((warning(message))) +# define __compiletime_error(message) __attribute__((error(message))) +#endif /* __CHECKER__ */ +#endif /* GCC_VERSION >= 40300 */ + +#if GCC_VERSION >= 40500 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() + +/* Mark a function definition as prohibited from being cloned. */ +#define __noclone __attribute__((__noclone__)) + +#endif /* GCC_VERSION >= 40500 */ + +#if GCC_VERSION >= 40600 +/* + * Tell the optimizer that something else uses this function or variable. + */ +#define __visible __attribute__((externally_visible)) +#endif + +/* + * GCC 'asm goto' miscompiles certain code sequences: + * + * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 + * + * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. + * + * (asm goto is automatically volatile - the naming reflects this.) + */ +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if GCC_VERSION >= 40400 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 50000 +#define KASAN_ABI_VERSION 4 +#elif GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif + +#endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) #define __noclone /* not needed */ diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h deleted file mode 100644 index 7d89feb..0000000 --- a/include/linux/compiler-gcc3.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -#if GCC_VERSION < 30200 -# error Sorry, your compiler is too old - please upgrade it. -#endif - -#if GCC_VERSION >= 30300 -# define __used __attribute__((__used__)) -#else -# define __used __attribute__((__unused__)) -#endif - -#if GCC_VERSION >= 30400 -#define __must_check __attribute__((warn_unused_result)) -#endif - -#ifdef CONFIG_GCOV_KERNEL -# if GCC_VERSION < 30400 -# error "GCOV profiling support for gcc versions below 3.4 not included" -# endif /* __GNUC_MINOR__ */ -#endif /* CONFIG_GCOV_KERNEL */ diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h deleted file mode 100644 index 769e198..0000000 --- a/include/linux/compiler-gcc4.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -/* GCC 4.1.[01] miscompiles __weak */ -#ifdef __KERNEL__ -# if GCC_VERSION >= 40100 && GCC_VERSION <= 40101 -# error Your version of gcc miscompiles the __weak directive -# endif -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a,b) __builtin_offsetof(a,b) - -#if GCC_VERSION >= 40100 && GCC_VERSION < 40600 -# define __compiletime_object_size(obj) __builtin_object_size(obj, 0) -#endif - -#if GCC_VERSION >= 40300 -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ -#endif /* GCC_VERSION >= 40300 */ - -#if GCC_VERSION >= 40500 -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -#endif /* GCC_VERSION >= 40500 */ - -#if GCC_VERSION >= 40600 -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) -#endif - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#if GCC_VERSION >= 40400 -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#endif -#if GCC_VERSION >= 40800 || (defined(__powerpc__) && GCC_VERSION >= 40600) -#define __HAVE_BUILTIN_BSWAP16__ -#endif -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#if GCC_VERSION >= 40902 -#define KASAN_ABI_VERSION 3 -#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h deleted file mode 100644 index efee493..0000000 --- a/include/linux/compiler-gcc5.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LINUX_COMPILER_H -#error "Please don't include directly, include instead." -#endif - -#define __used __attribute__((__used__)) -#define __must_check __attribute__((warn_unused_result)) -#define __compiler_offsetof(a, b) __builtin_offsetof(a, b) - -/* Mark functions as cold. gcc will assume any path leading to a call - to them will be unlikely. This means a lot of manual unlikely()s - are unnecessary now for any paths leading to the usual suspects - like BUG(), printk(), panic() etc. [but let's keep them for now for - older compilers] - - Early snapshots of gcc 4.3 don't support this and we can't detect this - in the preprocessor, but we can live with this because they're unreleased. - Maketime probing would be overkill here. - - gcc also has a __attribute__((__hot__)) to move hot functions into - a special section, but I don't see any sense in this right now in - the kernel context */ -#define __cold __attribute__((__cold__)) - -#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) - -#ifndef __CHECKER__ -# define __compiletime_warning(message) __attribute__((warning(message))) -# define __compiletime_error(message) __attribute__((error(message))) -#endif /* __CHECKER__ */ - -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - * - * Early snapshots of gcc 4.5 don't support this and we can't detect - * this in the preprocessor, but we can live with this because they're - * unreleased. Really, we need to have autoconf for the kernel. - */ -#define unreachable() __builtin_unreachable() - -/* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) - -/* - * Tell the optimizer that something else uses this function or variable. - */ -#define __visible __attribute__((externally_visible)) - -/* - * GCC 'asm goto' miscompiles certain code sequences: - * - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 - * - * Work it around via a compiler barrier quirk suggested by Jakub Jelinek. - * - * (asm goto is automatically volatile - the naming reflects this.) - */ -#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) - -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP -#define __HAVE_BUILTIN_BSWAP32__ -#define __HAVE_BUILTIN_BSWAP64__ -#define __HAVE_BUILTIN_BSWAP16__ -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ - -#define KASAN_ABI_VERSION 4 -- cgit v1.1 From b86a50c3b5414eafdbee7f34af4a201a4a7817c2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 Jun 2015 15:01:05 -0700 Subject: compiler-intel: fix wrong compiler barrier() macro Cleanup commit 73679e508201 ("compiler-intel.h: Remove duplicate definition") removed the double definition of __memory_barrier() intrinsics. However, in doing so, it also removed the preceding #undef barrier by accident, meaning, the actual barrier() macro from compiler-gcc.h with inline asm is still in place as __GNUC__ is provided. Subsequently, barrier() can never be defined as __memory_barrier() from compiler.h since it already has a definition in place and if we trust the comment in compiler-intel.h, ecc doesn't support gcc specific asm statements. I don't have an ecc at hand (unsure if that's still used in the field?) and only found this by accident during code review, a revert of that cleanup would be simplest option. Fixes: 73679e508201 ("compiler-intel.h: Remove duplicate definition") Signed-off-by: Daniel Borkmann Reviewed-by: Pranith Kumar Cc: Pranith Kumar Cc: H. Peter Anvin Cc: mancha security Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-intel.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index 0c9a2f2..d4c7113 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -13,10 +13,12 @@ /* Intel ECC compiler doesn't support gcc specific asm stmts. * It uses intrinsics to do the equivalent things. */ +#undef barrier #undef barrier_data #undef RELOC_HIDE #undef OPTIMIZER_HIDE_VAR +#define barrier() __memory_barrier() #define barrier_data(ptr) barrier() #define RELOC_HIDE(ptr, off) \ -- cgit v1.1 From e34cadde3be793f179107228243242ccabdbb57c Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Thu, 25 Jun 2015 15:01:08 -0700 Subject: Pratyush Anand has moved pratyush.anand@st.com email-id doesn't exist anymore as I have left the company. Replace ST's id with pratyush.anand@gmail.com. Signed-off-by: Pratyush Anand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + Documentation/ABI/testing/configfs-spear-pcie-gadget | 2 +- Documentation/ABI/testing/sysfs-bus-usb-lvstest | 12 ++++++------ Documentation/misc-devices/spear-pcie-gadget.txt | 2 +- drivers/misc/spear13xx_pcie_gadget.c | 2 +- drivers/pci/host/pcie-spear13xx.c | 4 ++-- drivers/phy/phy-spear1310-miphy.c | 4 ++-- drivers/phy/phy-spear1340-miphy.c | 4 ++-- drivers/usb/misc/lvstest.c | 2 +- 9 files changed, 17 insertions(+), 16 deletions(-) diff --git a/.mailmap b/.mailmap index 6287004..145eaa9 100644 --- a/.mailmap +++ b/.mailmap @@ -95,6 +95,7 @@ Patrick Mochel Peter A Jonsson Peter Oruba Peter Oruba +Pratyush Anand Praveen BP Rajesh Shah Ralf Baechle diff --git a/Documentation/ABI/testing/configfs-spear-pcie-gadget b/Documentation/ABI/testing/configfs-spear-pcie-gadget index 8759881..840c324 100644 --- a/Documentation/ABI/testing/configfs-spear-pcie-gadget +++ b/Documentation/ABI/testing/configfs-spear-pcie-gadget @@ -1,7 +1,7 @@ What: /config/pcie-gadget Date: Feb 2011 KernelVersion: 2.6.37 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Interface is used to configure selected dual mode PCIe controller diff --git a/Documentation/ABI/testing/sysfs-bus-usb-lvstest b/Documentation/ABI/testing/sysfs-bus-usb-lvstest index aae68fc..5151290 100644 --- a/Documentation/ABI/testing/sysfs-bus-usb-lvstest +++ b/Documentation/ABI/testing/sysfs-bus-usb-lvstest @@ -4,14 +4,14 @@ driver is bound with root hub device. What: /sys/bus/usb/devices/.../get_dev_desc Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Write to this node to issue "Get Device Descriptor" for Link Layer Validation device. It is needed for TD.7.06. What: /sys/bus/usb/devices/.../u1_timeout Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Set "U1 timeout" for the downstream port where Link Layer Validation device is connected. Timeout value must be between 0 @@ -19,7 +19,7 @@ Description: What: /sys/bus/usb/devices/.../u2_timeout Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Set "U2 timeout" for the downstream port where Link Layer Validation device is connected. Timeout value must be between 0 @@ -27,21 +27,21 @@ Description: What: /sys/bus/usb/devices/.../hot_reset Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Write to this node to issue "Reset" for Link Layer Validation device. It is needed for TD.7.29, TD.7.31, TD.7.34 and TD.7.35. What: /sys/bus/usb/devices/.../u3_entry Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Write to this node to issue "U3 entry" for Link Layer Validation device. It is needed for TD.7.35 and TD.7.36. What: /sys/bus/usb/devices/.../u3_exit Date: March 2014 -Contact: Pratyush Anand +Contact: Pratyush Anand Description: Write to this node to issue "U3 exit" for Link Layer Validation device. It is needed for TD.7.36. diff --git a/Documentation/misc-devices/spear-pcie-gadget.txt b/Documentation/misc-devices/spear-pcie-gadget.txt index 02c13ef..89b88de 100644 --- a/Documentation/misc-devices/spear-pcie-gadget.txt +++ b/Documentation/misc-devices/spear-pcie-gadget.txt @@ -2,7 +2,7 @@ Spear PCIe Gadget Driver: Author ============= -Pratyush Anand (pratyush.anand@st.com) +Pratyush Anand (pratyush.anand@gmail.com) Location ============ diff --git a/drivers/misc/spear13xx_pcie_gadget.c b/drivers/misc/spear13xx_pcie_gadget.c index fe3ad0c..b8374cd 100644 --- a/drivers/misc/spear13xx_pcie_gadget.c +++ b/drivers/misc/spear13xx_pcie_gadget.c @@ -2,7 +2,7 @@ * drivers/misc/spear13xx_pcie_gadget.c * * Copyright (C) 2010 ST Microelectronics - * Pratyush Anand + * Pratyush Anand * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index dfec428..3ba275a 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -4,7 +4,7 @@ * SPEAr13xx PCIe Glue Layer Source Code * * Copyright (C) 2010-2014 ST Microelectronics - * Pratyush Anand + * Pratyush Anand * Mohit Kumar * * This file is licensed under the terms of the GNU General Public @@ -386,5 +386,5 @@ static int __init spear13xx_pcie_init(void) module_init(spear13xx_pcie_init); MODULE_DESCRIPTION("ST Microelectronics SPEAr13xx PCIe host controller driver"); -MODULE_AUTHOR("Pratyush Anand "); +MODULE_AUTHOR("Pratyush Anand "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/phy-spear1310-miphy.c b/drivers/phy/phy-spear1310-miphy.c index 65ae640..85d3e11 100644 --- a/drivers/phy/phy-spear1310-miphy.c +++ b/drivers/phy/phy-spear1310-miphy.c @@ -2,7 +2,7 @@ * ST SPEAr1310-miphy driver * * Copyright (C) 2014 ST Microelectronics - * Pratyush Anand + * Pratyush Anand * Mohit Kumar * * This program is free software; you can redistribute it and/or modify @@ -257,5 +257,5 @@ static struct platform_driver spear1310_miphy_driver = { module_platform_driver(spear1310_miphy_driver); MODULE_DESCRIPTION("ST SPEAR1310-MIPHY driver"); -MODULE_AUTHOR("Pratyush Anand "); +MODULE_AUTHOR("Pratyush Anand "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/phy-spear1340-miphy.c b/drivers/phy/phy-spear1340-miphy.c index 1a00c28..2f98eba 100644 --- a/drivers/phy/phy-spear1340-miphy.c +++ b/drivers/phy/phy-spear1340-miphy.c @@ -2,7 +2,7 @@ * ST spear1340-miphy driver * * Copyright (C) 2014 ST Microelectronics - * Pratyush Anand + * Pratyush Anand * Mohit Kumar * * This program is free software; you can redistribute it and/or modify @@ -290,5 +290,5 @@ static struct platform_driver spear1340_miphy_driver = { module_platform_driver(spear1340_miphy_driver); MODULE_DESCRIPTION("ST SPEAR1340-MIPHY driver"); -MODULE_AUTHOR("Pratyush Anand "); +MODULE_AUTHOR("Pratyush Anand "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c index 62cb8cd..86b4e4b 100644 --- a/drivers/usb/misc/lvstest.c +++ b/drivers/usb/misc/lvstest.c @@ -4,7 +4,7 @@ * Test pattern generation for Link Layer Validation System Tests * * Copyright (C) 2014 ST Microelectronics - * Pratyush Anand + * Pratyush Anand * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.1 From 9c5dcdd0c71b819bf8e5b50a17d1ea89fe68e4d7 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Thu, 25 Jun 2015 15:01:11 -0700 Subject: Mohit Kumar has moved Mohit's email-id doesn't exist anymore as he has left the company. Replace ST's id with mohit.kumar.dhaka@gmail.com. Signed-off-by: Pratyush Anand Cc: Mohit Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + drivers/pci/host/pcie-spear13xx.c | 2 +- drivers/phy/phy-spear1310-miphy.c | 2 +- drivers/phy/phy-spear1340-miphy.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 145eaa9..bcc72d0 100644 --- a/.mailmap +++ b/.mailmap @@ -84,6 +84,7 @@ Mayuresh Janorkar Michael Buesch Michel Dänzer Mitesh shah +Mohit Kumar Morten Welinder Morten Welinder Morten Welinder diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index 3ba275a..c49fbdc 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -5,7 +5,7 @@ * * Copyright (C) 2010-2014 ST Microelectronics * Pratyush Anand - * Mohit Kumar + * Mohit Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/phy/phy-spear1310-miphy.c b/drivers/phy/phy-spear1310-miphy.c index 85d3e11..45d0005 100644 --- a/drivers/phy/phy-spear1310-miphy.c +++ b/drivers/phy/phy-spear1310-miphy.c @@ -3,7 +3,7 @@ * * Copyright (C) 2014 ST Microelectronics * Pratyush Anand - * Mohit Kumar + * Mohit Kumar * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/drivers/phy/phy-spear1340-miphy.c b/drivers/phy/phy-spear1340-miphy.c index 2f98eba..494240d 100644 --- a/drivers/phy/phy-spear1340-miphy.c +++ b/drivers/phy/phy-spear1340-miphy.c @@ -3,7 +3,7 @@ * * Copyright (C) 2014 ST Microelectronics * Pratyush Anand - * Mohit Kumar + * Mohit Kumar * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as -- cgit v1.1 From 4d5b367ca45e91fe33d0608666733a1cce5a8f5d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 25 Jun 2015 15:01:13 -0700 Subject: mailmap: add rdunlap email auto-correction To avoid having xenotime bounce when things like get_maintainers gives me addresses, add Randy's current address. Signed-off-by: Kees Cook Acked-by: Randy Dunlap Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index bcc72d0..977f958 100644 --- a/.mailmap +++ b/.mailmap @@ -101,6 +101,7 @@ Praveen BP Rajesh Shah Ralf Baechle Ralf Wildenhues +Randy Dunlap Rémi Denis-Courmont Ricardo Ribalda Delgado Rudolf Marek -- cgit v1.1 From 8c7fbe5795a016259445a61e072eb0118aaf6a61 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:16 -0700 Subject: stddef.h: move offsetofend inside #ifndef/#endif guard, neaten Commit 3876488444e7 ("include/stddef.h: Move offsetofend() from vfio.h to a generic kernel header") added offsetofend outside the normal include #ifndef/#endif guard. Move it inside. Miscellanea: o remove unnecessary blank line o standardize offsetof macros whitespace style Signed-off-by: Joe Perches Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stddef.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 076af43..9c61c7c 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -3,7 +3,6 @@ #include - #undef NULL #define NULL ((void *)0) @@ -14,10 +13,9 @@ enum { #undef offsetof #ifdef __compiler_offsetof -#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#define offsetof(TYPE, MEMBER) __compiler_offsetof(TYPE, MEMBER) #else -#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) -#endif +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) #endif /** @@ -28,3 +26,5 @@ enum { */ #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + +#endif -- cgit v1.1 From 3033f14ab78c326871a4902591c2518410add24a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 25 Jun 2015 15:01:19 -0700 Subject: clone: support passing tls argument via C rather than pt_regs magic clone has some of the quirkiest syscall handling in the kernel, with a pile of special cases, historical curiosities, and architecture-specific calling conventions. In particular, clone with CLONE_SETTLS accepts a parameter "tls" that the C entry point completely ignores and some assembly entry points overwrite; instead, the low-level arch-specific code pulls the tls parameter out of the arch-specific register captured as part of pt_regs on entry to the kernel. That's a massive hack, and it makes the arch-specific code only work when called via the specific existing syscall entry points; because of this hack, any new clone-like system call would have to accept an identical tls argument in exactly the same arch-specific position, rather than providing a unified system call entry point across architectures. The first patch allows architectures to handle the tls argument via normal C parameter passing, if they opt in by selecting HAVE_COPY_THREAD_TLS. The second patch makes 32-bit and 64-bit x86 opt into this. These two patches came out of the clone4 series, which isn't ready for this merge window, but these first two cleanup patches were entirely uncontroversial and have acks. I'd like to go ahead and submit these two so that other architectures can begin building on top of this and opting into HAVE_COPY_THREAD_TLS. However, I'm also happy to wait and send these through the next merge window (along with v3 of clone4) if anyone would prefer that. This patch (of 2): clone with CLONE_SETTLS accepts an argument to set the thread-local storage area for the new thread. sys_clone declares an int argument tls_val in the appropriate point in the argument list (based on the various CLONE_BACKWARDS variants), but doesn't actually use or pass along that argument. Instead, sys_clone calls do_fork, which calls copy_process, which calls the arch-specific copy_thread, and copy_thread pulls the corresponding syscall argument out of the pt_regs captured at kernel entry (knowing what argument of clone that architecture passes tls in). Apart from being awful and inscrutable, that also only works because only one code path into copy_thread can pass the CLONE_SETTLS flag, and that code path comes from sys_clone with its architecture-specific argument-passing order. This prevents introducing a new version of the clone system call without propagating the same architecture-specific position of the tls argument. However, there's no reason to pull the argument out of pt_regs when sys_clone could just pass it down via C function call arguments. Introduce a new CONFIG_HAVE_COPY_THREAD_TLS for architectures to opt into, and a new copy_thread_tls that accepts the tls parameter as an additional unsigned long (syscall-argument-sized) argument. Change sys_clone's tls argument to an unsigned long (which does not change the ABI), and pass that down to copy_thread_tls. Architectures that don't opt into copy_thread_tls will continue to ignore the C argument to sys_clone in favor of the pt_regs captured at kernel entry, and thus will be unable to introduce new versions of the clone syscall. Patch co-authored by Josh Triplett and Thiago Macieira. Signed-off-by: Josh Triplett Acked-by: Andy Lutomirski Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thiago Macieira Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 7 ++++++ arch/s390/kernel/compat_wrapper.c | 2 +- drivers/misc/kgdbts.c | 2 +- include/linux/sched.h | 15 ++++++++++++ include/linux/syscalls.h | 6 ++--- kernel/fork.c | 48 ++++++++++++++++++++++++++------------- 6 files changed, 59 insertions(+), 21 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index a65eafb..bec6666 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -499,6 +499,13 @@ config ARCH_HAS_ELF_RANDOMIZE - arch_mmap_rnd() - arch_randomize_brk() +config HAVE_COPY_THREAD_TLS + bool + help + Architecture provides copy_thread_tls to accept tls argument via + normal C parameter passing, rather than extracting the syscall + argument from pt_regs. + # # ABI hall of shame # diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index d7fa2f0..f8498dd 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -202,7 +202,7 @@ COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); -COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, unsigned long, tls); COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 36f5d52..9a60bd4d3 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -220,7 +220,7 @@ static unsigned long lookup_addr(char *arg) else if (!strcmp(arg, "sys_open")) addr = (unsigned long)do_sys_open; else if (!strcmp(arg, "do_fork")) - addr = (unsigned long)do_fork; + addr = (unsigned long)_do_fork; else if (!strcmp(arg, "hw_break_val")) addr = (unsigned long)&hw_break_val; addr = (unsigned long) dereference_function_descriptor((void *)addr); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6633e83..93ed0b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2556,8 +2556,22 @@ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); +#ifdef CONFIG_HAVE_COPY_THREAD_TLS +extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); +#else extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); + +/* Architectures that haven't opted into copy_thread_tls get the tls argument + * via pt_regs, so ignore the tls argument passed via C. */ +static inline int copy_thread_tls( + unsigned long clone_flags, unsigned long sp, unsigned long arg, + struct task_struct *p, unsigned long tls) +{ + return copy_thread(clone_flags, sp, arg, p); +} +#endif extern void flush_thread(void); extern void exit_thread(void); @@ -2576,6 +2590,7 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); +extern long _do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *, unsigned long); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 76d1e38..bb51bec 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -827,15 +827,15 @@ asmlinkage long sys_syncfs(int fd); asmlinkage long sys_fork(void); asmlinkage long sys_vfork(void); #ifdef CONFIG_CLONE_BACKWARDS -asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, +asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, unsigned long, int __user *); #else #ifdef CONFIG_CLONE_BACKWARDS3 asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *, - int __user *, int); + int __user *, unsigned long); #else asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, - int __user *, int); + int __user *, unsigned long); #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index 0bb88b5..4c95cb3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1238,7 +1238,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, - int trace) + int trace, + unsigned long tls) { int retval; struct task_struct *p; @@ -1447,7 +1448,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread(clone_flags, stack_start, stack_size, p); + retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); if (retval) goto bad_fork_cleanup_io; @@ -1659,7 +1660,7 @@ static inline void init_idle_pids(struct pid_link *links) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); + task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); if (!IS_ERR(task)) { init_idle_pids(task->pids); init_idle(task, cpu); @@ -1674,11 +1675,12 @@ struct task_struct *fork_idle(int cpu) * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ -long do_fork(unsigned long clone_flags, +long _do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, - int __user *child_tidptr) + int __user *child_tidptr, + unsigned long tls) { struct task_struct *p; int trace = 0; @@ -1703,7 +1705,7 @@ long do_fork(unsigned long clone_flags, } p = copy_process(clone_flags, stack_start, stack_size, - child_tidptr, NULL, trace); + child_tidptr, NULL, trace, tls); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1744,20 +1746,34 @@ long do_fork(unsigned long clone_flags, return nr; } +#ifndef CONFIG_HAVE_COPY_THREAD_TLS +/* For compatibility with architectures that call do_fork directly rather than + * using the syscall entry points below. */ +long do_fork(unsigned long clone_flags, + unsigned long stack_start, + unsigned long stack_size, + int __user *parent_tidptr, + int __user *child_tidptr) +{ + return _do_fork(clone_flags, stack_start, stack_size, + parent_tidptr, child_tidptr, 0); +} +#endif + /* * Create a kernel thread. */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, - (unsigned long)arg, NULL, NULL); + return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, + (unsigned long)arg, NULL, NULL, 0); } #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU - return do_fork(SIGCHLD, 0, 0, NULL, NULL); + return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); #else /* can not support in nommu mode */ return -EINVAL; @@ -1768,8 +1784,8 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, - 0, NULL, NULL); + return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + 0, NULL, NULL, 0); } #endif @@ -1777,27 +1793,27 @@ SYSCALL_DEFINE0(vfork) #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, - int, tls_val, + unsigned long, tls, int __user *, child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #elif defined(CONFIG_CLONE_BACKWARDS3) SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, stack_size, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #endif { - return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); + return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); } #endif -- cgit v1.1 From 4ae555a5313a302d36e34b3273545e4088c37cce Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Jun 2015 15:01:22 -0700 Subject: drivers/misc/altera-stapl/altera.c: remove extraneous KERN_INFO prefix The KERN_INFO prefix is being prepended to KERN_DEBUG when using the dprink macro, Remove it as it is extraneous since we are printing the message out as debug via dprintk(). Fixes smatch warning: drivers/misc/altera-stapl/altera.c:2454 altera_init() warn: KERN_* level not at start of string Signed-off-by: Colin Ian King Cc: Rasmus Villemoes Cc: Igor M. Liplianin Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/altera-stapl/altera.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index bca2630..f53e217 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2451,7 +2451,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) astate->config = config; if (!astate->config->jtag_io) { - dprintk(KERN_INFO "%s: using byteblaster!\n", __func__); + dprintk("%s: using byteblaster!\n", __func__); astate->config->jtag_io = netup_jtag_io_lpt; } -- cgit v1.1 From d43ff430f434d862db59582c0f1f02382a678036 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:24 -0700 Subject: printk: guard the amount written per line by devkmsg_read() This patchset updates netconsole so that it can emit messages with the same header as used in /dev/kmsg which gives neconsole receiver full log information which enables things like structured logging and detection of lost messages. This patch (of 7): devkmsg_read() uses 8k buffer and assumes that the formatted output message won't overrun which seems safe given LOG_LINE_MAX, the current use of dict and the escaping method being used; however, we're planning to use devkmsg formatting wider and accounting for the buffer size properly isn't that complicated. This patch defines CONSOLE_EXT_LOG_MAX as 8192 and updates devkmsg_read() so that it limits output accordingly. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Reviewed-by: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 2 ++ kernel/printk/printk.c | 34 ++++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 9b30871..58b1fec 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,8 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +#define CONSOLE_EXT_LOG_MAX 8192 + /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c099b08..a115490 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -505,6 +505,11 @@ int check_syslog_permissions(int type, bool from_file) return security_syslog(type); } +static void append_char(char **pp, char *e, char c) +{ + if (*pp < e) + *(*pp)++ = c; +} /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { @@ -512,7 +517,7 @@ struct devkmsg_user { u32 idx; enum log_flags prev; struct mutex lock; - char buf[8192]; + char buf[CONSOLE_EXT_LOG_MAX]; }; static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) @@ -570,6 +575,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, { struct devkmsg_user *user = file->private_data; struct printk_log *msg; + char *p, *e; u64 ts_usec; size_t i; char cont = '-'; @@ -579,6 +585,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, if (!user) return -EBADF; + p = user->buf; + e = user->buf + sizeof(user->buf); + ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; @@ -625,9 +634,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))) cont = '+'; - len = sprintf(user->buf, "%u,%llu,%llu,%c;", - (msg->facility << 3) | msg->level, - user->seq, ts_usec, cont); + p += scnprintf(p, e - p, "%u,%llu,%llu,%c;", + (msg->facility << 3) | msg->level, + user->seq, ts_usec, cont); user->prev = msg->flags; /* escape non-printable characters */ @@ -635,11 +644,11 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, unsigned char c = log_text(msg)[i]; if (c < ' ' || c >= 127 || c == '\\') - len += sprintf(user->buf + len, "\\x%02x", c); + p += scnprintf(p, e - p, "\\x%02x", c); else - user->buf[len++] = c; + append_char(&p, e, c); } - user->buf[len++] = '\n'; + append_char(&p, e, '\n'); if (msg->dict_len) { bool line = true; @@ -648,30 +657,31 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, unsigned char c = log_dict(msg)[i]; if (line) { - user->buf[len++] = ' '; + append_char(&p, e, ' '); line = false; } if (c == '\0') { - user->buf[len++] = '\n'; + append_char(&p, e, '\n'); line = true; continue; } if (c < ' ' || c >= 127 || c == '\\') { - len += sprintf(user->buf + len, "\\x%02x", c); + p += scnprintf(p, e - p, "\\x%02x", c); continue; } - user->buf[len++] = c; + append_char(&p, e, c); } - user->buf[len++] = '\n'; + append_char(&p, e, '\n'); } user->idx = log_next(user->idx); user->seq++; raw_spin_unlock_irq(&logbuf_lock); + len = p - user->buf; if (len > count) { ret = -EINVAL; goto out; -- cgit v1.1 From 0a295e67ec19d59bdb146e0b60ac9659104f2215 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:27 -0700 Subject: printk: factor out message formatting from devkmsg_read() The extended message formatting used for /dev/kmsg will be used implement extended consoles. Factor out msg_print_ext_header() and msg_print_ext_body() from devkmsg_read(). This is pure restructuring. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Reviewed-by: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 146 +++++++++++++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 66 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a115490..51ce4f1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -511,6 +511,81 @@ static void append_char(char **pp, char *e, char c) *(*pp)++ = c; } +static ssize_t msg_print_ext_header(char *buf, size_t size, + struct printk_log *msg, u64 seq, + enum log_flags prev_flags) +{ + u64 ts_usec = msg->ts_nsec; + char cont = '-'; + + do_div(ts_usec, 1000); + + /* + * If we couldn't merge continuation line fragments during the print, + * export the stored flags to allow an optional external merge of the + * records. Merging the records isn't always neccessarily correct, like + * when we hit a race during printing. In most cases though, it produces + * better readable output. 'c' in the record flags mark the first + * fragment of a line, '+' the following. + */ + if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT)) + cont = 'c'; + else if ((msg->flags & LOG_CONT) || + ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX))) + cont = '+'; + + return scnprintf(buf, size, "%u,%llu,%llu,%c;", + (msg->facility << 3) | msg->level, seq, ts_usec, cont); +} + +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *dict, size_t dict_len, + char *text, size_t text_len) +{ + char *p = buf, *e = buf + size; + size_t i; + + /* escape non-printable characters */ + for (i = 0; i < text_len; i++) { + unsigned char c = text[i]; + + if (c < ' ' || c >= 127 || c == '\\') + p += scnprintf(p, e - p, "\\x%02x", c); + else + append_char(&p, e, c); + } + append_char(&p, e, '\n'); + + if (dict_len) { + bool line = true; + + for (i = 0; i < dict_len; i++) { + unsigned char c = dict[i]; + + if (line) { + append_char(&p, e, ' '); + line = false; + } + + if (c == '\0') { + append_char(&p, e, '\n'); + line = true; + continue; + } + + if (c < ' ' || c >= 127 || c == '\\') { + p += scnprintf(p, e - p, "\\x%02x", c); + continue; + } + + append_char(&p, e, c); + } + append_char(&p, e, '\n'); + } + + return p - buf; +} + /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { u64 seq; @@ -575,19 +650,12 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, { struct devkmsg_user *user = file->private_data; struct printk_log *msg; - char *p, *e; - u64 ts_usec; - size_t i; - char cont = '-'; size_t len; ssize_t ret; if (!user) return -EBADF; - p = user->buf; - e = user->buf + sizeof(user->buf); - ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; @@ -617,71 +685,17 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, } msg = log_from_idx(user->idx); - ts_usec = msg->ts_nsec; - do_div(ts_usec, 1000); + len = msg_print_ext_header(user->buf, sizeof(user->buf), + msg, user->seq, user->prev); + len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, + log_dict(msg), msg->dict_len, + log_text(msg), msg->text_len); - /* - * If we couldn't merge continuation line fragments during the print, - * export the stored flags to allow an optional external merge of the - * records. Merging the records isn't always neccessarily correct, like - * when we hit a race during printing. In most cases though, it produces - * better readable output. 'c' in the record flags mark the first - * fragment of a line, '+' the following. - */ - if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT)) - cont = 'c'; - else if ((msg->flags & LOG_CONT) || - ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))) - cont = '+'; - - p += scnprintf(p, e - p, "%u,%llu,%llu,%c;", - (msg->facility << 3) | msg->level, - user->seq, ts_usec, cont); user->prev = msg->flags; - - /* escape non-printable characters */ - for (i = 0; i < msg->text_len; i++) { - unsigned char c = log_text(msg)[i]; - - if (c < ' ' || c >= 127 || c == '\\') - p += scnprintf(p, e - p, "\\x%02x", c); - else - append_char(&p, e, c); - } - append_char(&p, e, '\n'); - - if (msg->dict_len) { - bool line = true; - - for (i = 0; i < msg->dict_len; i++) { - unsigned char c = log_dict(msg)[i]; - - if (line) { - append_char(&p, e, ' '); - line = false; - } - - if (c == '\0') { - append_char(&p, e, '\n'); - line = true; - continue; - } - - if (c < ' ' || c >= 127 || c == '\\') { - p += scnprintf(p, e - p, "\\x%02x", c); - continue; - } - - append_char(&p, e, c); - } - append_char(&p, e, '\n'); - } - user->idx = log_next(user->idx); user->seq++; raw_spin_unlock_irq(&logbuf_lock); - len = p - user->buf; if (len > count) { ret = -EINVAL; goto out; -- cgit v1.1 From 6fe29354befe4c46eb308b662155d4d8017358e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:30 -0700 Subject: printk: implement support for extended console drivers printk log_buf keeps various metadata for each message including its sequence number and timestamp. The metadata is currently available only through /dev/kmsg and stripped out before passed onto console drivers. We want this metadata to be available to console drivers too so that console consumers can get full information including the metadata and dictionary, which among other things can be used to detect whether messages got lost in transit. This patch implements support for extended console drivers. Consoles can indicate that they want extended messages by setting the new CON_EXTENDED flag and they'll be fed messages formatted the same way as /dev/kmsg. ",,,;\n" If extended consoles exist, in-kernel fragment assembly is disabled. This ensures that all messages emitted to consoles have full metadata including sequence number. The contflag carries enough information to reassemble the fragments from the reader side trivially. Note that this only affects /dev/kmsg. Regular console and /proc/kmsg outputs are not affected by this change. * Extended message formatting for console drivers is enabled iff there are registered extended consoles. * Comment describing /dev/kmsg message format updated to add missing contflag field and help distinguishing variable from verbatim terms. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Reviewed-by: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/ABI/testing/dev-kmsg | 9 ++++++ include/linux/console.h | 1 + kernel/printk/printk.c | 66 +++++++++++++++++++++++++++++++++----- 3 files changed, 68 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg index bb820be..fff817e 100644 --- a/Documentation/ABI/testing/dev-kmsg +++ b/Documentation/ABI/testing/dev-kmsg @@ -98,4 +98,13 @@ Description: The /dev/kmsg character device node provides userspace access logic is used internally when messages are printed to the console, /proc/kmsg or the syslog() syscall. + By default, kernel tries to avoid fragments by concatenating + when it can and fragments are rare; however, when extended + console support is enabled, the in-kernel concatenation is + disabled and /dev/kmsg output will contain more fragments. If + the log consumer performs concatenation, the end result + should be the same. In the future, the in-kernel concatenation + may be removed entirely and /dev/kmsg users are recommended to + implement fragment handling. + Users: dmesg(1), userspace kernel log consumers diff --git a/include/linux/console.h b/include/linux/console.h index 9f50fb4..bd19434 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -115,6 +115,7 @@ static inline int con_debug_leave(void) #define CON_BOOT (8) #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ #define CON_BRL (32) /* Used for a braille device */ +#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ struct console { char name[16]; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 51ce4f1..ae980dc 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -85,6 +85,18 @@ static struct lockdep_map console_lock_dep_map = { #endif /* + * Number of registered extended console drivers. + * + * If extended consoles are present, in-kernel cont reassembly is disabled + * and each fragment is stored as a separate log entry with proper + * continuation flag so that every emitted message has full metadata. This + * doesn't change the result for regular consoles or /proc/kmsg. For + * /dev/kmsg, as long as the reader concatenates messages according to + * consecutive continuation flags, the end result should be the same too. + */ +static int nr_ext_console_drivers; + +/* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. */ @@ -195,7 +207,7 @@ static int console_may_schedule; * need to be changed in the future, when the requirements change. * * /dev/kmsg exports the structured data in the following line format: - * "level,sequnum,timestamp;\n" + * ",,,;\n" * * The optional key/value pairs are attached as continuation lines starting * with a space character and terminated by a newline. All possible @@ -1417,7 +1429,9 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(int level, const char *text, size_t len) +static void call_console_drivers(int level, + const char *ext_text, size_t ext_len, + const char *text, size_t len) { struct console *con; @@ -1438,7 +1452,10 @@ static void call_console_drivers(int level, const char *text, size_t len) if (!cpu_online(smp_processor_id()) && !(con->flags & CON_ANYTIME)) continue; - con->write(con, text, len); + if (con->flags & CON_EXTENDED) + con->write(con, ext_text, ext_len); + else + con->write(con, text, len); } } @@ -1581,8 +1598,12 @@ static bool cont_add(int facility, int level, const char *text, size_t len) if (cont.len && cont.flushed) return false; - if (cont.len + len > sizeof(cont.buf)) { - /* the line gets too long, split it up in separate records */ + /* + * If ext consoles are present, flush and skip in-kernel + * continuation. See nr_ext_console_drivers definition. Also, if + * the line gets too long, split it up in separate records. + */ + if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { cont_flush(LOG_CONT); return false; } @@ -1917,9 +1938,19 @@ static struct cont { u8 level; bool flushed:1; } cont; +static char *log_text(const struct printk_log *msg) { return NULL; } +static char *log_dict(const struct printk_log *msg) { return NULL; } static struct printk_log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } -static void call_console_drivers(int level, const char *text, size_t len) {} +static ssize_t msg_print_ext_header(char *buf, size_t size, + struct printk_log *msg, u64 seq, + enum log_flags prev_flags) { return 0; } +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *dict, size_t dict_len, + char *text, size_t text_len) { return 0; } +static void call_console_drivers(int level, + const char *ext_text, size_t ext_len, + const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } @@ -2172,7 +2203,7 @@ static void console_cont_flush(char *text, size_t size) len = cont_print_text(text, size); raw_spin_unlock(&logbuf_lock); stop_critical_timings(); - call_console_drivers(cont.level, text, len); + call_console_drivers(cont.level, NULL, 0, text, len); start_critical_timings(); local_irq_restore(flags); return; @@ -2196,6 +2227,7 @@ out: */ void console_unlock(void) { + static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[LOG_LINE_MAX + PREFIX_MAX]; static u64 seen_seq; unsigned long flags; @@ -2214,6 +2246,7 @@ void console_unlock(void) again: for (;;) { struct printk_log *msg; + size_t ext_len = 0; size_t len; int level; @@ -2259,13 +2292,22 @@ skip: level = msg->level; len += msg_print_text(msg, console_prev, false, text + len, sizeof(text) - len); + if (nr_ext_console_drivers) { + ext_len = msg_print_ext_header(ext_text, + sizeof(ext_text), + msg, console_seq, console_prev); + ext_len += msg_print_ext_body(ext_text + ext_len, + sizeof(ext_text) - ext_len, + log_dict(msg), msg->dict_len, + log_text(msg), msg->text_len); + } console_idx = log_next(console_idx); console_seq++; console_prev = msg->flags; raw_spin_unlock(&logbuf_lock); stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(level, text, len); + call_console_drivers(level, ext_text, ext_len, text, len); start_critical_timings(); local_irq_restore(flags); } @@ -2521,6 +2563,11 @@ void register_console(struct console *newcon) newcon->next = console_drivers->next; console_drivers->next = newcon; } + + if (newcon->flags & CON_EXTENDED) + if (!nr_ext_console_drivers++) + pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); + if (newcon->flags & CON_PRINTBUFFER) { /* * console_unlock(); will print out the buffered messages @@ -2593,6 +2640,9 @@ int unregister_console(struct console *console) } } + if (!res && (console->flags & CON_EXTENDED)) + nr_ext_console_drivers--; + /* * If this isn't the last console and it has CON_CONSDEV set, we * need to set it on the next preferred console. -- cgit v1.1 From a6d403ac96893cb5c5812c2d75de082795f02e5f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:33 -0700 Subject: netconsole: remove unnecessary netconsole_target_get/out() from write_msg() write_msg() grabs target_list_lock and walks target_list invoking netpool_send_udp() on each target. Curiously, it protects each iteration with netconsole_target_get/put() even though it never releases target_list_lock which protects all the members. While this doesn't harm anything, it doesn't serve any purpose either. The items on the list can't go away while target_list_lock is held. Remove the unnecessary get/put pair. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Cc: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/netconsole.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 15731d1..30c0524 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -744,7 +744,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { - netconsole_target_get(nt); if (nt->enabled && netif_running(nt->np.dev)) { /* * We nest this inside the for-each-target loop above @@ -760,7 +759,6 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) left -= frag; } } - netconsole_target_put(nt); } spin_unlock_irqrestore(&target_list_lock, flags); } -- cgit v1.1 From 698cf1c6163a2f2752d2d3e511a26c688f5778f3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:36 -0700 Subject: netconsole: make netconsole_target->enabled a bool netconsole uses both bool and int for boolean values. Let's convert nt->enabled to bool for consistency. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Cc: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/netconsole.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 30c0524..8dd1e55 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -104,7 +104,7 @@ struct netconsole_target { #ifdef CONFIG_NETCONSOLE_DYNAMIC struct config_item item; #endif - int enabled; + bool enabled; struct mutex mutex; struct netpoll np; }; @@ -197,7 +197,7 @@ static struct netconsole_target *alloc_param_target(char *target_config) if (err) goto fail; - nt->enabled = 1; + nt->enabled = true; return nt; @@ -322,13 +322,13 @@ static ssize_t store_enabled(struct netconsole_target *nt, return err; if (enabled < 0 || enabled > 1) return -EINVAL; - if (enabled == nt->enabled) { + if ((bool)enabled == nt->enabled) { pr_info("network logging has already %s\n", nt->enabled ? "started" : "stopped"); return -EINVAL; } - if (enabled) { /* 1 */ + if (enabled) { /* true */ /* * Skip netpoll_parse_options() -- all the attributes are * already configured via configfs. Just print them out. @@ -340,13 +340,13 @@ static ssize_t store_enabled(struct netconsole_target *nt, return err; pr_info("netconsole: network logging started\n"); - } else { /* 0 */ + } else { /* false */ /* We need to disable the netconsole before cleaning it up * otherwise we might end up in write_msg() with - * nt->np.dev == NULL and nt->enabled == 1 + * nt->np.dev == NULL and nt->enabled == true */ spin_lock_irqsave(&target_list_lock, flags); - nt->enabled = 0; + nt->enabled = false; spin_unlock_irqrestore(&target_list_lock, flags); netpoll_cleanup(&nt->np); } @@ -594,7 +594,7 @@ static struct config_item *make_netconsole_target(struct config_group *group, /* * Allocate and initialize with defaults. - * Target is disabled at creation (enabled == 0). + * Target is disabled at creation (!enabled). */ nt = kzalloc(sizeof(*nt), GFP_KERNEL); if (!nt) @@ -695,7 +695,7 @@ restart: spin_lock_irqsave(&target_list_lock, flags); dev_put(nt->np.dev); nt->np.dev = NULL; - nt->enabled = 0; + nt->enabled = false; stopped = true; netconsole_target_put(nt); goto restart; -- cgit v1.1 From 369e5a888179f31dcd7cc8e02d695e6481a66075 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:38 -0700 Subject: netconsole: make all dynamic netconsoles share a mutex Currently, each dynamic netconsole_target uses its own separate mutex to synchronize the configuration operations. This patch replaces the per-netconsole_target mutexes with a single mutex - dynamic_netconsole_mutex. The reduced granularity doesn't hurt anything, the code is minutely simpler and this'd allow adding operations which should be synchronized across all dynamic netconsoles. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Cc: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/netconsole.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 8dd1e55..9b0c81e 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -105,13 +105,13 @@ struct netconsole_target { struct config_item item; #endif bool enabled; - struct mutex mutex; struct netpoll np; }; #ifdef CONFIG_NETCONSOLE_DYNAMIC static struct configfs_subsystem netconsole_subsys; +static DEFINE_MUTEX(dynamic_netconsole_mutex); static int __init dynamic_netconsole_init(void) { @@ -185,7 +185,6 @@ static struct netconsole_target *alloc_param_target(char *target_config) strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ); nt->np.local_port = 6665; nt->np.remote_port = 6666; - mutex_init(&nt->mutex); eth_broadcast_addr(nt->np.remote_mac); /* Parse parameters and setup netpoll */ @@ -562,10 +561,10 @@ static ssize_t netconsole_target_attr_store(struct config_item *item, struct netconsole_target_attr *na = container_of(attr, struct netconsole_target_attr, attr); - mutex_lock(&nt->mutex); + mutex_lock(&dynamic_netconsole_mutex); if (na->store) ret = na->store(nt, buf, count); - mutex_unlock(&nt->mutex); + mutex_unlock(&dynamic_netconsole_mutex); return ret; } @@ -604,7 +603,6 @@ static struct config_item *make_netconsole_target(struct config_group *group, strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ); nt->np.local_port = 6665; nt->np.remote_port = 6666; - mutex_init(&nt->mutex); eth_broadcast_addr(nt->np.remote_mac); /* Initialize the config_item member */ -- cgit v1.1 From e2f15f9a79201ddd596727b84a85c419ee57ad5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 Jun 2015 15:01:41 -0700 Subject: netconsole: implement extended console support printk logbuf keeps various metadata and optional key=value dictionary for structured messages, both of which are stripped when messages are handed to regular console drivers. It can be useful to have this metadata and dictionary available to netconsole consumers. This obviously makes logging via netconsole more complete and the sequence number in particular is useful in environments where messages may be lost or reordered in transit - e.g. when netconsole is used to collect messages in a large cluster where packets may have to travel congested hops to reach the aggregator. The lost and reordered messages can easily be identified and handled accordingly using the sequence numbers. printk recently added extended console support which can be selected by setting CON_EXTENDED flag. From console driver side, not much changes. The only difference is that the text passed to the write callback is formatted the same way as /dev/kmsg. This patch implements extended console support for netconsole which can be enabled by either prepending "+" to a netconsole boot param entry or echoing 1 to "extended" file in configfs. When enabled, netconsole transmits extended log messages with headers identical to /dev/kmsg output. There's one complication due to message fragments. netconsole limits the maximum message size to 1k and messages longer than that are split into multiple fragments. As all extended console messages should carry matching headers and be uniquely identifiable, each extended message fragment carries full copy of the metadata and an extra header field to identify the specific fragment. The optional header is of the form "ncfrag=OFF/LEN" where OFF is the byte offset into the message body and LEN is the total length. To avoid unnecessarily making printk format extended messages, Extended netconsole is registered with printk when the first extended netconsole is configured. Signed-off-by: Tejun Heo Cc: David Miller Cc: Kay Sievers Cc: Petr Mladek Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/networking/netconsole.txt | 35 +++++++- drivers/net/netconsole.c | 141 +++++++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt index a5d574a..30409a3 100644 --- a/Documentation/networking/netconsole.txt +++ b/Documentation/networking/netconsole.txt @@ -2,6 +2,7 @@ started by Ingo Molnar , 2001.09.17 2.6 port and netpoll api by Matt Mackall , Sep 9 2003 IPv6 support by Cong Wang , Jan 1 2013 +Extended console support by Tejun Heo , May 1 2015 Please send bug reports to Matt Mackall Satyam Sharma , and Cong Wang @@ -24,9 +25,10 @@ Sender and receiver configuration: It takes a string configuration parameter "netconsole" in the following format: - netconsole=[src-port]@[src-ip]/[],[tgt-port]@/[tgt-macaddr] + netconsole=[+][src-port]@[src-ip]/[],[tgt-port]@/[tgt-macaddr] where + + if present, enable extended console support src-port source for UDP packets (defaults to 6665) src-ip source IP to use (interface address) dev network interface (eth0) @@ -107,6 +109,7 @@ To remove a target: The interface exposes these parameters of a netconsole target to userspace: enabled Is this target currently enabled? (read-write) + extended Extended mode enabled (read-write) dev_name Local network interface name (read-write) local_port Source UDP port to use (read-write) remote_port Remote agent's UDP port (read-write) @@ -132,6 +135,36 @@ You can also update the local interface dynamically. This is especially useful if you want to use interfaces that have newly come up (and may not have existed when netconsole was loaded / initialized). +Extended console: +================= + +If '+' is prefixed to the configuration line or "extended" config file +is set to 1, extended console support is enabled. An example boot +param follows. + + linux netconsole=+4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc + +Log messages are transmitted with extended metadata header in the +following format which is the same as /dev/kmsg. + + ,,,; + +Non printable characters in are escaped using "\xff" +notation. If the message contains optional dictionary, verbatim +newline is used as the delimeter. + +If a message doesn't fit in certain number of bytes (currently 1000), +the message is split into multiple fragments by netconsole. These +fragments are transmitted with "ncfrag" header field added. + + ncfrag=/ + +For example, assuming a lot smaller chunk size, a message "the first +chunk, the 2nd chunk." may be split as follows. + + 6,416,1758426,-,ncfrag=0/31;the first chunk, + 6,416,1758426,-,ncfrag=16/31; the 2nd chunk. + Miscellaneous notes: ==================== diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 9b0c81e..97f3acd 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -79,6 +79,12 @@ static LIST_HEAD(target_list); /* This needs to be a spinlock because write_msg() cannot sleep */ static DEFINE_SPINLOCK(target_list_lock); +/* + * Console driver for extended netconsoles. Registered on the first use to + * avoid unnecessarily enabling ext message formatting. + */ +static struct console netconsole_ext; + /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. @@ -105,6 +111,7 @@ struct netconsole_target { struct config_item item; #endif bool enabled; + bool extended; struct netpoll np; }; @@ -187,6 +194,11 @@ static struct netconsole_target *alloc_param_target(char *target_config) nt->np.remote_port = 6666; eth_broadcast_addr(nt->np.remote_mac); + if (*target_config == '+') { + nt->extended = true; + target_config++; + } + /* Parse parameters and setup netpoll */ err = netpoll_parse_options(&nt->np, target_config); if (err) @@ -257,6 +269,11 @@ static ssize_t show_enabled(struct netconsole_target *nt, char *buf) return snprintf(buf, PAGE_SIZE, "%d\n", nt->enabled); } +static ssize_t show_extended(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", nt->extended); +} + static ssize_t show_dev_name(struct netconsole_target *nt, char *buf) { return snprintf(buf, PAGE_SIZE, "%s\n", nt->np.dev_name); @@ -328,6 +345,11 @@ static ssize_t store_enabled(struct netconsole_target *nt, } if (enabled) { /* true */ + if (nt->extended && !(netconsole_ext.flags & CON_ENABLED)) { + netconsole_ext.flags |= CON_ENABLED; + register_console(&netconsole_ext); + } + /* * Skip netpoll_parse_options() -- all the attributes are * already configured via configfs. Just print them out. @@ -355,6 +377,30 @@ static ssize_t store_enabled(struct netconsole_target *nt, return strnlen(buf, count); } +static ssize_t store_extended(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + int extended; + int err; + + if (nt->enabled) { + pr_err("target (%s) is enabled, disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + err = kstrtoint(buf, 10, &extended); + if (err < 0) + return err; + if (extended < 0 || extended > 1) + return -EINVAL; + + nt->extended = extended; + + return strnlen(buf, count); +} + static ssize_t store_dev_name(struct netconsole_target *nt, const char *buf, size_t count) @@ -507,6 +553,7 @@ static struct netconsole_target_attr netconsole_target_##_name = \ __CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, show_##_name, store_##_name) NETCONSOLE_TARGET_ATTR_RW(enabled); +NETCONSOLE_TARGET_ATTR_RW(extended); NETCONSOLE_TARGET_ATTR_RW(dev_name); NETCONSOLE_TARGET_ATTR_RW(local_port); NETCONSOLE_TARGET_ATTR_RW(remote_port); @@ -517,6 +564,7 @@ NETCONSOLE_TARGET_ATTR_RW(remote_mac); static struct configfs_attribute *netconsole_target_attrs[] = { &netconsole_target_enabled.attr, + &netconsole_target_extended.attr, &netconsole_target_dev_name.attr, &netconsole_target_local_port.attr, &netconsole_target_remote_port.attr, @@ -727,6 +775,82 @@ static struct notifier_block netconsole_netdev_notifier = { .notifier_call = netconsole_netdev_event, }; +/** + * send_ext_msg_udp - send extended log message to target + * @nt: target to send message to + * @msg: extended log message to send + * @msg_len: length of message + * + * Transfer extended log @msg to @nt. If @msg is longer than + * MAX_PRINT_CHUNK, it'll be split and transmitted in multiple chunks with + * ncfrag header field added to identify them. + */ +static void send_ext_msg_udp(struct netconsole_target *nt, const char *msg, + int msg_len) +{ + static char buf[MAX_PRINT_CHUNK]; /* protected by target_list_lock */ + const char *header, *body; + int offset = 0; + int header_len, body_len; + + if (msg_len <= MAX_PRINT_CHUNK) { + netpoll_send_udp(&nt->np, msg, msg_len); + return; + } + + /* need to insert extra header fields, detect header and body */ + header = msg; + body = memchr(msg, ';', msg_len); + if (WARN_ON_ONCE(!body)) + return; + + header_len = body - header; + body_len = msg_len - header_len - 1; + body++; + + /* + * Transfer multiple chunks with the following extra header. + * "ncfrag=/" + */ + memcpy(buf, header, header_len); + + while (offset < body_len) { + int this_header = header_len; + int this_chunk; + + this_header += scnprintf(buf + this_header, + sizeof(buf) - this_header, + ",ncfrag=%d/%d;", offset, body_len); + + this_chunk = min(body_len - offset, + MAX_PRINT_CHUNK - this_header); + if (WARN_ON_ONCE(this_chunk <= 0)) + return; + + memcpy(buf + this_header, body + offset, this_chunk); + + netpoll_send_udp(&nt->np, buf, this_header + this_chunk); + + offset += this_chunk; + } +} + +static void write_ext_msg(struct console *con, const char *msg, + unsigned int len) +{ + struct netconsole_target *nt; + unsigned long flags; + + if ((oops_only && !oops_in_progress) || list_empty(&target_list)) + return; + + spin_lock_irqsave(&target_list_lock, flags); + list_for_each_entry(nt, &target_list, list) + if (nt->extended && nt->enabled && netif_running(nt->np.dev)) + send_ext_msg_udp(nt, msg, len); + spin_unlock_irqrestore(&target_list_lock, flags); +} + static void write_msg(struct console *con, const char *msg, unsigned int len) { int frag, left; @@ -742,7 +866,7 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) spin_lock_irqsave(&target_list_lock, flags); list_for_each_entry(nt, &target_list, list) { - if (nt->enabled && netif_running(nt->np.dev)) { + if (!nt->extended && nt->enabled && netif_running(nt->np.dev)) { /* * We nest this inside the for-each-target loop above * so that we're able to get as much logging out to @@ -761,6 +885,12 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) spin_unlock_irqrestore(&target_list_lock, flags); } +static struct console netconsole_ext = { + .name = "netcon_ext", + .flags = CON_EXTENDED, /* starts disabled, registered on first use */ + .write = write_ext_msg, +}; + static struct console netconsole = { .name = "netcon", .flags = CON_ENABLED, @@ -783,7 +913,11 @@ static int __init init_netconsole(void) goto fail; } /* Dump existing printks when we register */ - netconsole.flags |= CON_PRINTBUFFER; + if (nt->extended) + netconsole_ext.flags |= CON_PRINTBUFFER | + CON_ENABLED; + else + netconsole.flags |= CON_PRINTBUFFER; spin_lock_irqsave(&target_list_lock, flags); list_add(&nt->list, &target_list); @@ -799,6 +933,8 @@ static int __init init_netconsole(void) if (err) goto undonotifier; + if (netconsole_ext.flags & CON_ENABLED) + register_console(&netconsole_ext); register_console(&netconsole); pr_info("network logging started\n"); @@ -827,6 +963,7 @@ static void __exit cleanup_netconsole(void) { struct netconsole_target *nt, *tmp; + unregister_console(&netconsole_ext); unregister_console(&netconsole); dynamic_netconsole_exit(); unregister_netdevice_notifier(&netconsole_netdev_notifier); -- cgit v1.1 From d194e5d666225b04c7754471df0948f645b6ab3a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 25 Jun 2015 15:01:44 -0700 Subject: security_syslog() should be called once only The final version of commit 637241a900cb ("kmsg: honor dmesg_restrict sysctl on /dev/kmsg") lost few hooks, as result security_syslog() are processed incorrectly: - open of /dev/kmsg checks syslog access permissions by using check_syslog_permissions() where security_syslog() is not called if dmesg_restrict is set. - syslog syscall and /proc/kmsg calls do_syslog() where security_syslog can be executed twice (inside check_syslog_permissions() and then directly in do_syslog()) With this patch security_syslog() is called once only in all syslog-related operations regardless of dmesg_restrict value. Fixes: 637241a900cb ("kmsg: honor dmesg_restrict sysctl on /dev/kmsg") Signed-off-by: Vasily Averin Cc: Kees Cook Cc: Josh Boyer Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ae980dc..45fa8c8 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -496,11 +496,11 @@ int check_syslog_permissions(int type, bool from_file) * already done the capabilities checks at open time. */ if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; + goto ok; if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) - return 0; + goto ok; /* * For historical reasons, accept CAP_SYS_ADMIN too, with * a warning. @@ -510,10 +510,11 @@ int check_syslog_permissions(int type, bool from_file) "CAP_SYS_ADMIN but no CAP_SYSLOG " "(deprecated).\n", current->comm, task_pid_nr(current)); - return 0; + goto ok; } return -EPERM; } +ok: return security_syslog(type); } @@ -1299,10 +1300,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) if (error) goto out; - error = security_syslog(type); - if (error) - return error; - switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; -- cgit v1.1 From 3ea4331c60be3eee4c97e5ddabad95399f879b76 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 25 Jun 2015 15:01:47 -0700 Subject: check_syslog_permissions() cleanup Patch fixes drawbacks in heck_syslog_permissions() noticed by AKPM: "from_file handling makes me cry. That's not a boolean - it's an enumerated value with two values currently defined. But the code in check_syslog_permissions() treats it as a boolean and also hardwires the knowledge that SYSLOG_FROM_PROC == 1 (or == `true`). And the name is wrong: it should be called from_proc to match SYSLOG_FROM_PROC." Signed-off-by: Vasily Averin Cc: Kees Cook Cc: Josh Boyer Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syslog.h | 6 +++--- kernel/printk/printk.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/syslog.h b/include/linux/syslog.h index 4b7b875..c3a7f0c 100644 --- a/include/linux/syslog.h +++ b/include/linux/syslog.h @@ -47,12 +47,12 @@ #define SYSLOG_FROM_READER 0 #define SYSLOG_FROM_PROC 1 -int do_syslog(int type, char __user *buf, int count, bool from_file); +int do_syslog(int type, char __user *buf, int count, int source); #ifdef CONFIG_PRINTK -int check_syslog_permissions(int type, bool from_file); +int check_syslog_permissions(int type, int source); #else -static inline int check_syslog_permissions(int type, bool from_file) +static inline int check_syslog_permissions(int type, int source) { return 0; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 45fa8c8..de55384 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -489,13 +489,13 @@ static int syslog_action_restricted(int type) type != SYSLOG_ACTION_SIZE_BUFFER; } -int check_syslog_permissions(int type, bool from_file) +int check_syslog_permissions(int type, int source) { /* * If this is from /proc/kmsg and we've already opened it, then we've * already done the capabilities checks at open time. */ - if (from_file && type != SYSLOG_ACTION_OPEN) + if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) goto ok; if (syslog_action_restricted(type)) { @@ -1290,13 +1290,13 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return len; } -int do_syslog(int type, char __user *buf, int len, bool from_file) +int do_syslog(int type, char __user *buf, int len, int source) { bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; - error = check_syslog_permissions(type, from_file); + error = check_syslog_permissions(type, source); if (error) goto out; @@ -1379,7 +1379,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) syslog_prev = 0; syslog_partial = 0; } - if (from_file) { + if (source == SYSLOG_FROM_PROC) { /* * Short-cut for poll(/"proc/kmsg") which simply checks * for pending data, not the size; return the count of -- cgit v1.1 From 435de0782b658c993350049e853ea9a8795df4e2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:50 -0700 Subject: get_maintainer.pl: add .get_maintainer.ignore file capability Some people prefer not to be cc'd on patches. Add an ability to have a file (.get_maintainer.ignore) with names and email addresses that are excluded from being listed except when specifically listed as a maintainer in a section. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index d701627..fc169fd 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -186,6 +186,27 @@ if (-f $conf) { unshift(@ARGV, @conf_args) if @conf_args; } +my @ignore_emails = (); +my $ignore_file = which_conf(".get_maintainer.ignore"); +if (-f $ignore_file) { + open(my $ignore, '<', "$ignore_file") + or warn "$P: Can't find a readable .get_maintainer.ignore file $!\n"; + while (<$ignore>) { + my $line = $_; + + $line =~ s/\s*\n?$//; + $line =~ s/^\s*//; + $line =~ s/\s+$//; + $line =~ s/#.*$//; + + next if ($line =~ m/^\s*$/); + if (rfc822_valid($line)) { + push(@ignore_emails, $line); + } + } + close($ignore); +} + if (!GetOptions( 'email!' => \$email, 'git!' => \$email_git, @@ -513,6 +534,16 @@ if ($web) { exit($exit); +sub ignore_email_address { + my ($address) = @_; + + foreach my $ignore (@ignore_emails) { + return 1 if ($ignore eq $address); + } + + return 0; +} + sub range_is_maintained { my ($start, $end) = @_; @@ -1868,6 +1899,7 @@ sub vcs_assign { my $percent = $sign_offs * 100 / $divisor; $percent = 100 if ($percent > 100); + next if (ignore_email_address($line)); $count++; last if ($sign_offs < $email_git_min_signatures || $count > $email_git_max_maintainers || -- cgit v1.1 From 364f68dc996a63b7e54dd8b9624f64a76f43dd12 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:52 -0700 Subject: get_maintainer: emit longer section headers Section headers can be quite long and some are very long and duplicated for many initial characters. The current maximum length emitted for a section header is 20 bytes (or 17 bytes then ... when the section header length is > 20). Change that length to 50 so more of the section is shown. Example new output: $ ./scripts/get_maintainer.pl -f drivers/net/ethernet/broadcom/bnx2x/ Ariel Elior (supporter:BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER) netdev@vger.kernel.org (open list:BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER) linux-kernel@vger.kernel.org (open list) Old: $ ./scripts/get_maintainer.pl -f drivers/net/ethernet/broadcom/bnx2x/ Ariel Elior (supporter:BROADCOM BNX2X 10...) netdev@vger.kernel.org (open list:BROADCOM BNX2X 10...) linux-kernel@vger.kernel.org (open list) Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index fc169fd..cffa658 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -42,6 +42,7 @@ my $output_multiline = 1; my $output_separator = ", "; my $output_roles = 0; my $output_rolestats = 1; +my $output_section_maxlen = 50; my $scm = 0; my $web = 0; my $subsystem = 0; @@ -978,8 +979,8 @@ sub get_maintainer_role { my $role = "unknown"; my $subsystem = $typevalue[$start]; - if (length($subsystem) > 20) { - $subsystem = substr($subsystem, 0, 17); + if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) { + $subsystem = substr($subsystem, 0, $output_section_maxlen - 3); $subsystem =~ s/\s*$//; $subsystem = $subsystem . "..."; } @@ -1021,8 +1022,8 @@ sub get_list_role { my $end = find_ending_index($index); my $subsystem = $typevalue[$start]; - if (length($subsystem) > 20) { - $subsystem = substr($subsystem, 0, 17); + if ($output_section_maxlen && length($subsystem) > $output_section_maxlen) { + $subsystem = substr($subsystem, 0, $output_section_maxlen - 3); $subsystem =~ s/\s*$//; $subsystem = $subsystem . "..."; } -- cgit v1.1 From ce8155f7a3d59ce868ea16d8891edda4d865e873 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:01:55 -0700 Subject: get_maintainer: fix perl 5.22/5.24 deprecated/incompatible "\C" use Perl 5.22 emits a deprecated message when "\C" is used in a regex. Perl 5.24 will disallow it altogether. Fix it by using [A-Z] instead of \C. Signed-off-by: Joe Perches Reported-by: Valdis Kletnieks Tested-by: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index cffa658..98bae86 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -305,7 +305,7 @@ open (my $maint, '<', "${lk_path}MAINTAINERS") while (<$maint>) { my $line = $_; - if ($line =~ m/^(\C):\s*(.*)/) { + if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; my $value = $2; @@ -550,7 +550,7 @@ sub range_is_maintained { for (my $i = $start; $i < $end; $i++) { my $line = $typevalue[$i]; - if ($line =~ m/^(\C):\s*(.*)/) { + if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; my $value = $2; if ($type eq 'S') { @@ -568,7 +568,7 @@ sub range_has_maintainer { for (my $i = $start; $i < $end; $i++) { my $line = $typevalue[$i]; - if ($line =~ m/^(\C):\s*(.*)/) { + if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; my $value = $2; if ($type eq 'M') { @@ -617,7 +617,7 @@ sub get_maintainers { for ($i = $start; $i < $end; $i++) { my $line = $typevalue[$i]; - if ($line =~ m/^(\C):\s*(.*)/) { + if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; my $value = $2; if ($type eq 'X') { @@ -632,7 +632,7 @@ sub get_maintainers { if (!$exclude) { for ($i = $start; $i < $end; $i++) { my $line = $typevalue[$i]; - if ($line =~ m/^(\C):\s*(.*)/) { + if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; my $value = $2; if ($type eq 'F') { @@ -933,7 +933,7 @@ sub find_first_section { while ($index < @typevalue) { my $tv = $typevalue[$index]; - if (($tv =~ m/^(\C):\s*(.*)/)) { + if (($tv =~ m/^([A-Z]):\s*(.*)/)) { last; } $index++; @@ -947,7 +947,7 @@ sub find_starting_index { while ($index > 0) { my $tv = $typevalue[$index]; - if (!($tv =~ m/^(\C):\s*(.*)/)) { + if (!($tv =~ m/^([A-Z]):\s*(.*)/)) { last; } $index--; @@ -961,7 +961,7 @@ sub find_ending_index { while ($index < @typevalue) { my $tv = $typevalue[$index]; - if (!($tv =~ m/^(\C):\s*(.*)/)) { + if (!($tv =~ m/^([A-Z]):\s*(.*)/)) { last; } $index++; @@ -987,7 +987,7 @@ sub get_maintainer_role { for ($i = $start + 1; $i < $end; $i++) { my $tv = $typevalue[$i]; - if ($tv =~ m/^(\C):\s*(.*)/) { + if ($tv =~ m/^([A-Z]):\s*(.*)/) { my $ptype = $1; my $pvalue = $2; if ($ptype eq "S") { @@ -1046,7 +1046,7 @@ sub add_categories { for ($i = $start + 1; $i < $end; $i++) { my $tv = $typevalue[$i]; - if ($tv =~ m/^(\C):\s*(.*)/) { + if ($tv =~ m/^([A-Z]):\s*(.*)/) { my $ptype = $1; my $pvalue = $2; if ($ptype eq "L") { @@ -1088,7 +1088,7 @@ sub add_categories { if ($name eq "") { if ($i > 0) { my $tv = $typevalue[$i - 1]; - if ($tv =~ m/^(\C):\s*(.*)/) { + if ($tv =~ m/^([A-Z]):\s*(.*)/) { if ($1 eq "P") { $name = $2; $pvalue = format_email($name, $address, $email_usename); @@ -1105,7 +1105,7 @@ sub add_categories { if ($name eq "") { if ($i > 0) { my $tv = $typevalue[$i - 1]; - if ($tv =~ m/^(\C):\s*(.*)/) { + if ($tv =~ m/^([A-Z]):\s*(.*)/) { if ($1 eq "P") { $name = $2; $pvalue = format_email($name, $address, $email_usename); -- cgit v1.1 From e5747e4016e763fe49346e2c95aa55989d02ff21 Mon Sep 17 00:00:00 2001 From: Jim Davis Date: Thu, 25 Jun 2015 15:01:58 -0700 Subject: MAINTAINERS: alsa-devel@alsa-project.org is moderated for non-subscribers Fix a few inconsistent annotations to show that the alsa-devel mailing list is moderated for non-subscribers. Signed-off-by: Jim Davis Cc: Joe Perches Cc: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 246d9d8..c284c8e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5178,7 +5178,7 @@ K: \b(ABS|SYN)_MT_ INTEL ASoC BDW/HSW DRIVERS M: Jie Yang -L: alsa-devel@alsa-project.org +L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/intel/sst-haswell* F: sound/soc/intel/sst-dsp* @@ -6761,7 +6761,7 @@ F: drivers/net/ethernet/natsemi/natsemi.c NATIVE INSTRUMENTS USB SOUND INTERFACE DRIVER M: Daniel Mack S: Maintained -L: alsa-devel@alsa-project.org +L: alsa-devel@alsa-project.org (moderated for non-subscribers) W: http://www.native-instruments.com F: sound/usb/caiaq/ @@ -7179,7 +7179,7 @@ F: arch/arm/mach-omap2/prm* OMAP AUDIO SUPPORT M: Peter Ujfalusi M: Jarkko Nikula -L: alsa-devel@alsa-project.org (subscribers-only) +L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linux-omap@vger.kernel.org S: Maintained F: sound/soc/omap/ -- cgit v1.1 From 9c3646d1c6661d6411d4111f8c1ff940f9c4b4d1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:00 -0700 Subject: MAINTAINERS: add quotation marks around names with periods This makes it easier to copy/paste names with periods to email clients. All the other names with periods already have quotation marks. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index c284c8e..9b49727 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -259,7 +259,7 @@ S: Maintained F: drivers/platform/x86/acer-wmi.c ACPI -M: Rafael J. Wysocki +M: "Rafael J. Wysocki" M: Len Brown L: linux-acpi@vger.kernel.org W: https://01.org/linux-acpi @@ -280,7 +280,7 @@ F: tools/power/acpi/ ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore M: Lv Zheng -M: Rafael J. Wysocki +M: "Rafael J. Wysocki" L: linux-acpi@vger.kernel.org L: devel@acpica.org W: https://acpica.org/ @@ -2804,7 +2804,7 @@ S: Maintained F: drivers/net/ethernet/ti/cpmac.c CPU FREQUENCY DRIVERS -M: Rafael J. Wysocki +M: "Rafael J. Wysocki" M: Viresh Kumar L: linux-pm@vger.kernel.org S: Maintained @@ -2843,7 +2843,7 @@ F: drivers/cpuidle/cpuidle-exynos.c F: arch/arm/mach-exynos/pm.c CPUIDLE DRIVERS -M: Rafael J. Wysocki +M: "Rafael J. Wysocki" M: Daniel Lezcano L: linux-pm@vger.kernel.org S: Maintained @@ -4059,7 +4059,7 @@ F: include/uapi/scsi/fc/ FILE LOCKING (flock() and fcntl()/lockf()) M: Jeff Layton -M: J. Bruce Fields +M: "J. Bruce Fields" L: linux-fsdevel@vger.kernel.org S: Maintained F: include/linux/fcntl.h @@ -4255,7 +4255,7 @@ F: sound/soc/fsl/imx* F: sound/soc/fsl/mpc8610_hpcd.c FREESCALE QORIQ MANAGEMENT COMPLEX DRIVER -M: J. German Rivera +M: "J. German Rivera" L: linux-kernel@vger.kernel.org S: Maintained F: drivers/staging/fsl-mc/ @@ -4816,7 +4816,7 @@ S: Maintained F: fs/hugetlbfs/ Hyper-V CORE AND DRIVERS -M: K. Y. Srinivasan +M: "K. Y. Srinivasan" M: Haiyang Zhang L: devel@linuxdriverproject.org S: Maintained @@ -7881,7 +7881,7 @@ F: include/linux/power_supply.h F: drivers/power/ PNP SUPPORT -M: Rafael J. Wysocki +M: "Rafael J. Wysocki" S: Maintained F: drivers/pnp/ @@ -8885,7 +8885,7 @@ F: drivers/mmc/host/sdhci-spear.c SECURITY SUBSYSTEM M: James Morris -M: Serge E. Hallyn +M: "Serge E. Hallyn" L: linux-security-module@vger.kernel.org (suggested Cc:) T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git W: http://kernsec.org/ @@ -9689,7 +9689,7 @@ F: fs/sysv/ F: include/linux/sysv_fs.h TARGET SUBSYSTEM -M: Nicholas A. Bellinger +M: "Nicholas A. Bellinger" L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org W: http://www.linux-iscsi.org @@ -9831,7 +9831,7 @@ F: include/linux/if_team.h F: include/uapi/linux/if_team.h TECHNOLOGIC SYSTEMS TS-5500 PLATFORM SUPPORT -M: Savoir-faire Linux Inc. +M: "Savoir-faire Linux Inc." S: Maintained F: arch/x86/platform/ts5500/ -- cgit v1.1 From e43cdb56f33d9335c8d49a2adfed4088f0e8b189 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:03 -0700 Subject: MAINTAINERS: Add quotation marks around names with commas This makes it easier to copy/paste names with periods to email clients. All the other names with commas already have quotation marks. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9b49727..d599077 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2498,7 +2498,7 @@ F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ CEPH DISTRIBUTED FILE SYSTEM CLIENT -M: Yan, Zheng +M: "Yan, Zheng" M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ @@ -9105,7 +9105,7 @@ F: arch/arm/mach-davinci/ F: drivers/i2c/busses/i2c-davinci.c TI DAVINCI SERIES MEDIA DRIVER -M: Lad, Prabhakar +M: "Lad, Prabhakar" L: linux-media@vger.kernel.org W: http://linuxtv.org/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ @@ -9115,7 +9115,7 @@ F: drivers/media/platform/davinci/ F: include/media/davinci/ TI AM437X VPFE DRIVER -M: Lad, Prabhakar +M: "Lad, Prabhakar" L: linux-media@vger.kernel.org W: http://linuxtv.org/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ @@ -9124,7 +9124,7 @@ S: Maintained F: drivers/media/platform/am437x/ OV2659 OMNIVISION SENSOR DRIVER -M: Lad, Prabhakar +M: "Lad, Prabhakar" L: linux-media@vger.kernel.org W: http://linuxtv.org/ Q: http://patchwork.linuxtv.org/project/linux-media/list/ -- cgit v1.1 From 4f973c63d158b9293f7dc16f54e119d75bc20380 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 25 Jun 2015 15:02:05 -0700 Subject: MAINTAINERS: Davidlohr has moved Reported-by: Rob Landley Cc: Borislav Petkov Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d599077..bd96a01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4537,7 +4537,7 @@ S: Maintained F: drivers/media/usb/gspca/ GUID PARTITION TABLE (GPT) -M: Davidlohr Bueso +M: Davidlohr Bueso L: linux-efi@vger.kernel.org S: Maintained F: block/partitions/efi.* -- cgit v1.1 From 2528a8b8f457d7432552d0e2b6f0f4046bb702f4 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 25 Jun 2015 15:02:08 -0700 Subject: __bitmap_parselist: fix bug in empty string handling bitmap_parselist("", &mask, nmaskbits) will erroneously set bit zero in the mask. The same bug is visible in cpumask_parselist() since it is layered on top of the bitmask code, e.g. if you boot with "isolcpus=", you will actually end up with cpu zero isolated. The bug was introduced in commit 4b060420a596 ("bitmap, irq: add smp_affinity_list interface to /proc/irq") when bitmap_parselist() was generalized to support userspace as well as kernelspace. Fixes: 4b060420a596 ("bitmap, irq: add smp_affinity_list interface to /proc/irq") Signed-off-by: Chris Metcalf Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 64c0926..40162f8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -506,12 +506,12 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, unsigned a, b; int c, old_c, totaldigits; const char __user __force *ubuf = (const char __user __force *)buf; - int exp_digit, in_range; + int at_start, in_range; totaldigits = c = 0; bitmap_zero(maskp, nmaskbits); do { - exp_digit = 1; + at_start = 1; in_range = 0; a = b = 0; @@ -540,11 +540,10 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, break; if (c == '-') { - if (exp_digit || in_range) + if (at_start || in_range) return -EINVAL; b = 0; in_range = 1; - exp_digit = 1; continue; } @@ -554,16 +553,18 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, b = b * 10 + (c - '0'); if (!in_range) a = b; - exp_digit = 0; + at_start = 0; totaldigits++; } if (!(a <= b)) return -EINVAL; if (b >= nmaskbits) return -ERANGE; - while (a <= b) { - set_bit(a, maskp); - a++; + if (!at_start) { + while (a <= b) { + set_bit(a, maskp); + a++; + } } } while (buflen && c == ','); return 0; -- cgit v1.1 From 79e23d577b938d8d79282fd979f4ff3112e1f3f9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 Jun 2015 15:02:11 -0700 Subject: hexdump: Make test data really const The test data arrays, containing pointers to test strings, are never modified, so they can be const, too. Hence mark them "const" and "__initconst". This moves 28 pointers from ".init.data" to ".init.rodata". Signed-off-by: Geert Uytterhoeven Cc: Andy Shevchenko Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test-hexdump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c index c227cc4..5241df3 100644 --- a/lib/test-hexdump.c +++ b/lib/test-hexdump.c @@ -25,19 +25,19 @@ static const char * const test_data_1_le[] __initconst = { "4c", "d1", "19", "99", "43", "b1", "af", "0c", }; -static const char *test_data_2_le[] __initdata = { +static const char * const test_data_2_le[] __initconst = { "32be", "7bdb", "180a", "b293", "ba70", "24c4", "837d", "9b34", "9ca6", "ad31", "0f9c", "e9ac", "d14c", "9919", "b143", "0caf", }; -static const char *test_data_4_le[] __initdata = { +static const char * const test_data_4_le[] __initconst = { "7bdb32be", "b293180a", "24c4ba70", "9b34837d", "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", }; -static const char *test_data_8_le[] __initdata = { +static const char * const test_data_8_le[] __initconst = { "b293180a7bdb32be", "9b34837d24c4ba70", "e9ac0f9cad319ca6", "0cafb1439919d14c", }; -- cgit v1.1 From ca96ab859ab4b5dad1709a6a22613920d19bfbbb Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 25 Jun 2015 15:02:14 -0700 Subject: lib/sort: Add 64 bit swap function In case the call side is not providing a swap function, we either use a 32 bit or a generic swap function. When swapping around pointers on 64 bit architectures falling back to use the generic swap function seems like an unnecessary waste. There at least 9 users ('sort' is of difficult to grep for) of sort() and all of them use the sort function without a customized swap function. Furthermore, they are all using pointers to swap around: arch/x86/kernel/e820.c:sanitize_e820_map() arch/x86/mm/extable.c:sort_extable() drivers/acpi/fan.c:acpi_fan_get_fps() fs/btrfs/super.c:btrfs_descending_sort_devices() fs/xfs/libxfs/xfs_dir2_block.c:xfs_dir2_sf_to_block() kernel/range.c:clean_sort_range() mm/memcontrol.c:__mem_cgroup_usage_register_event() sound/pci/hda/hda_auto_parser.c:snd_hda_parse_pin_defcfg() sound/pci/hda/hda_auto_parser.c:sort_pins_by_sequence() Obviously, we could improve the swap for other sizes as well but this is overkill at this point. A simple test shows sorting a 400 element array (try to stay in one page) with either with u32_swap() or u64_swap() show that the theory actually works. This test was done on a x86_64 (Intel Xeon E5-4610) machine. - swap_32: NumSamples = 100; Min = 48.00; Max = 49.00 Mean = 48.320000; Variance = 0.217600; SD = 0.466476; Median 48.000000 each * represents a count of 1 48.0000 - 48.1000 [ 68]: ******************************************************************** 48.1000 - 48.2000 [ 0]: 48.2000 - 48.3000 [ 0]: 48.3000 - 48.4000 [ 0]: 48.4000 - 48.5000 [ 0]: 48.5000 - 48.6000 [ 0]: 48.6000 - 48.7000 [ 0]: 48.7000 - 48.8000 [ 0]: 48.8000 - 48.9000 [ 0]: 48.9000 - 49.0000 [ 32]: ******************************** - swap_64: NumSamples = 100; Min = 44.00; Max = 63.00 Mean = 48.250000; Variance = 18.687500; SD = 4.322904; Median 47.000000 each * represents a count of 1 44.0000 - 45.9000 [ 15]: *************** 45.9000 - 47.8000 [ 37]: ************************************* 47.8000 - 49.7000 [ 39]: *************************************** 49.7000 - 51.6000 [ 0]: 51.6000 - 53.5000 [ 0]: 53.5000 - 55.4000 [ 0]: 55.4000 - 57.3000 [ 0]: 57.3000 - 59.2000 [ 1]: * 59.2000 - 61.1000 [ 3]: *** 61.1000 - 63.0000 [ 5]: ***** - swap_72: NumSamples = 100; Min = 53.00; Max = 71.00 Mean = 55.070000; Variance = 21.565100; SD = 4.643824; Median 53.000000 each * represents a count of 1 53.0000 - 54.8000 [ 73]: ************************************************************************* 54.8000 - 56.6000 [ 9]: ********* 56.6000 - 58.4000 [ 9]: ********* 58.4000 - 60.2000 [ 0]: 60.2000 - 62.0000 [ 0]: 62.0000 - 63.8000 [ 0]: 63.8000 - 65.6000 [ 0]: 65.6000 - 67.4000 [ 1]: * 67.4000 - 69.2000 [ 4]: **** 69.2000 - 71.0000 [ 4]: **** - test program: static int cmp_32(const void *a, const void *b) { u32 l = *(u32 *)a; u32 r = *(u32 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_64(const void *a, const void *b) { u64 l = *(u64 *)a; u64 r = *(u64 *)b; if (l < r) return -1; if (l > r) return 1; return 0; } static int cmp_72(const void *a, const void *b) { u32 l = get_unaligned((u32 *) a); u32 r = get_unaligned((u32 *) b); if (l < r) return -1; if (l > r) return 1; return 0; } static void init_array32(void *array) { u32 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array64(void *array) { u64 *a = array; int i; a[0] = 3821; for (i = 1; i < ARRAY_ELEMENTS; i++) a[i] = next_pseudo_random32(a[i-1]); } static void init_array72(void *array) { char *p; u32 v; int i; v = 3821; for (i = 0; i < ARRAY_ELEMENTS; i++) { p = (char *)array + (i * 9); put_unaligned(v, (u32*) p); v = next_pseudo_random32(v); } } static void sort_test(void (*init)(void *array), int (*cmp) (const void *, const void *), void *array, size_t size) { ktime_t start, stop; int i; for (i = 0; i < 10000; i++) { init(array); local_irq_disable(); start = ktime_get(); sort(array, ARRAY_ELEMENTS, size, cmp, NULL); stop = ktime_get(); local_irq_enable(); if (i > 10000 - 101) pr_info("%lld\n", ktime_to_us(ktime_sub(stop, start))); } } static void *create_array(size_t size) { void *array; array = kmalloc(ARRAY_ELEMENTS * size, GFP_KERNEL); if (!array) return NULL; return array; } static int perform_test(size_t size) { void *array; array = create_array(size); if (!array) return -ENOMEM; pr_info("test element size %d bytes\n", (int)size); switch (size) { case 4: sort_test(init_array32, cmp_32, array, size); break; case 8: sort_test(init_array64, cmp_64, array, size); break; case 9: sort_test(init_array72, cmp_72, array, size); break; } kfree(array); return 0; } static int __init sort_tests_init(void) { int err; err = perform_test(sizeof(u32)); if (err) return err; err = perform_test(sizeof(u64)); if (err) return err; err = perform_test(sizeof(u64)+1); if (err) return err; return 0; } static void __exit sort_tests_exit(void) { } module_init(sort_tests_init); module_exit(sort_tests_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Wagner"); MODULE_DESCRIPTION("sort perfomance tests"); Signed-off-by: Daniel Wagner Cc: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/sort.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/sort.c b/lib/sort.c index 43c9fe7..fc20df4 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -8,6 +8,12 @@ #include #include +static int alignment_ok(const void *base, int align) +{ + return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + ((unsigned long)base & (align - 1)) == 0; +} + static void u32_swap(void *a, void *b, int size) { u32 t = *(u32 *)a; @@ -15,6 +21,13 @@ static void u32_swap(void *a, void *b, int size) *(u32 *)b = t; } +static void u64_swap(void *a, void *b, int size) +{ + u64 t = *(u64 *)a; + *(u64 *)a = *(u64 *)b; + *(u64 *)b = t; +} + static void generic_swap(void *a, void *b, int size) { char t; @@ -50,8 +63,14 @@ void sort(void *base, size_t num, size_t size, /* pre-scale counters for performance */ int i = (num/2 - 1) * size, n = num * size, c, r; - if (!swap_func) - swap_func = (size == 4 ? u32_swap : generic_swap); + if (!swap_func) { + if (size == 4 && alignment_ok(base, 4)) + swap_func = u32_swap; + else if (size == 8 && alignment_ok(base, 8)) + swap_func = u64_swap; + else + swap_func = generic_swap; + } /* heapify */ for ( ; i >= 0; i -= size) { -- cgit v1.1 From 9cf79d115f0d161b63161650f14ff903e4c57937 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 25 Jun 2015 15:02:17 -0700 Subject: bitmap: remove explicit newline handling using scnprintf format string bitmap_print_to_pagebuf uses scnprintf to copy the cpumask/list to page buffer. It handles the newline and trailing null character explicitly. It's unnecessary and also partially duplicated as scnprintf already adds trailing null character. The newline can be passed through format string to scnprintf. This patch does that simplification. However theoretically there's one behavior difference: when the buffer is too small, the original code would still output '\n' at the end while the new code(with this patch) would just continue to print the formatted string. Since this function is dealing with only page buffers, it's highly unlikely to hit that corner case. This patch will help in auditing the users of bitmap_print_to_pagebuf to verify that the buffer passed is large enough and get rid of it completely by replacing them with direct scnprintf() [akpm@linux-foundation.org: tweak comment] Signed-off-by: Sudeep Holla Suggested-by: Pawel Moll Cc: Tejun Heo Cc: "Peter Zijlstra (Intel)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 40162f8..a578a01 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -462,19 +462,20 @@ EXPORT_SYMBOL(bitmap_parse_user); * Output format is a comma-separated list of decimal numbers and * ranges if list is specified or hex digits grouped into comma-separated * sets of 8 digits/set. Returns the number of characters written to buf. + * + * It is assumed that @buf is a pointer into a PAGE_SIZE area and that + * sufficient storage remains at @buf to accommodate the + * bitmap_print_to_pagebuf() output. */ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits) { - ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf - 2; + ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; int n = 0; - if (len > 1) { - n = list ? scnprintf(buf, len, "%*pbl", nmaskbits, maskp) : - scnprintf(buf, len, "%*pb", nmaskbits, maskp); - buf[n++] = '\n'; - buf[n] = '\0'; - } + if (len > 1) + n = list ? scnprintf(buf, len, "%*pbl\n", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb\n", nmaskbits, maskp); return n; } EXPORT_SYMBOL(bitmap_print_to_pagebuf); -- cgit v1.1 From 9d2a8da006fcbf2dea663c095f0a0088dfbbec15 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 25 Jun 2015 15:02:19 -0700 Subject: radix-tree: replace preallocated node array with linked list Currently we use per-cpu array to hold pointers to preallocated nodes. Let's replace it with linked list. On x86_64 it saves 256 bytes in per-cpu ELF section which may translate into freeing up 2MB of memory for NR_CPUS==8192. [akpm@linux-foundation.org: fix comment, coding style] Signed-off-by: Kirill A. Shutemov Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 061550d..f9ebe1c 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -65,7 +65,8 @@ static struct kmem_cache *radix_tree_node_cachep; */ struct radix_tree_preload { int nr; - struct radix_tree_node *nodes[RADIX_TREE_PRELOAD_SIZE]; + /* nodes->private_data points to next preallocated node */ + struct radix_tree_node *nodes; }; static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; @@ -197,8 +198,9 @@ radix_tree_node_alloc(struct radix_tree_root *root) */ rtp = this_cpu_ptr(&radix_tree_preloads); if (rtp->nr) { - ret = rtp->nodes[rtp->nr - 1]; - rtp->nodes[rtp->nr - 1] = NULL; + ret = rtp->nodes; + rtp->nodes = ret->private_data; + ret->private_data = NULL; rtp->nr--; } /* @@ -257,17 +259,20 @@ static int __radix_tree_preload(gfp_t gfp_mask) preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { + while (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { preempt_enable(); node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); if (node == NULL) goto out; preempt_disable(); rtp = this_cpu_ptr(&radix_tree_preloads); - if (rtp->nr < ARRAY_SIZE(rtp->nodes)) - rtp->nodes[rtp->nr++] = node; - else + if (rtp->nr < RADIX_TREE_PRELOAD_SIZE) { + node->private_data = rtp->nodes; + rtp->nodes = node; + rtp->nr++; + } else { kmem_cache_free(radix_tree_node_cachep, node); + } } ret = 0; out: @@ -1463,15 +1468,16 @@ static int radix_tree_callback(struct notifier_block *nfb, { int cpu = (long)hcpu; struct radix_tree_preload *rtp; + struct radix_tree_node *node; /* Free per-cpu pool of perloaded nodes */ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { rtp = &per_cpu(radix_tree_preloads, cpu); while (rtp->nr) { - kmem_cache_free(radix_tree_node_cachep, - rtp->nodes[rtp->nr-1]); - rtp->nodes[rtp->nr-1] = NULL; - rtp->nr--; + node = rtp->nodes; + rtp->nodes = node->private_data; + kmem_cache_free(radix_tree_node_cachep, node); + rtp->nr--; } } return NOTIFY_OK; -- cgit v1.1 From 94df290404cd0da8016698bf3f398410f29d9a64 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:22 -0700 Subject: lib/string.c: introduce strreplace() Strings are sometimes sanitized by replacing a certain character (often '/') by another (often '!'). In a few places, this is done the same way Schlemiel the Painter would do it. Others are slightly smarter but still do multiple strchr() calls. Introduce strreplace() to do this using a single function call and a single pass over the string. One would expect the return value to be one of three things: void, s, or the number of replacements made. I chose the fourth, returning a pointer to the end of the string. This is more likely to be useful (for example allowing the caller to avoid a strlen call). Signed-off-by: Rasmus Villemoes Cc: "Theodore Ts'o" Cc: Greg Kroah-Hartman Cc: Neil Brown Cc: Steven Rostedt Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 1 + lib/string.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index e40099e..a8d90db 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -111,6 +111,7 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +char *strreplace(char *s, char old, char new); extern void kfree_const(const void *x); diff --git a/lib/string.c b/lib/string.c index bb3d4b6..13d1e84 100644 --- a/lib/string.c +++ b/lib/string.c @@ -849,3 +849,20 @@ void *memchr_inv(const void *start, int c, size_t bytes) return check_bytes8(start, value, bytes % 8); } EXPORT_SYMBOL(memchr_inv); + +/** + * strreplace - Replace all occurrences of character in string. + * @s: The string to operate on. + * @old: The character being replaced. + * @new: The character @old is replaced with. + * + * Returns pointer to the nul byte at the end of @s. + */ +char *strreplace(char *s, char old, char new) +{ + for (; *s; ++s) + if (*s == old) + *s = new; + return s; +} +EXPORT_SYMBOL(strreplace); -- cgit v1.1 From 1bb564718f298dfd7c435819d3bc902e6be666c6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:25 -0700 Subject: kernel/trace/trace_events_filter.c: use strreplace() There's no point in starting over every time we see a ','... Signed-off-by: Rasmus Villemoes Acked-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/trace/trace_events_filter.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7f2e97c..9d4a78f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -2082,7 +2082,7 @@ struct function_filter_data { static char ** ftrace_function_filter_re(char *buf, int len, int *count) { - char *str, *sep, **re; + char *str, **re; str = kstrndup(buf, len, GFP_KERNEL); if (!str) @@ -2092,8 +2092,7 @@ ftrace_function_filter_re(char *buf, int len, int *count) * The argv_split function takes white space * as a separator, so convert ',' into spaces. */ - while ((sep = strchr(str, ','))) - *sep = ' '; + strreplace(str, ',', ' '); re = argv_split(GFP_KERNEL, str, count); kfree(str); -- cgit v1.1 From ff14417c0a00c9a906b4ba79fbecb79bd2435207 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:28 -0700 Subject: kernel/trace/blktrace.c: use strreplace() in do_blk_trace_setup() Part of the disassembly of do_blk_trace_setup: 231b: e8 00 00 00 00 callq 2320 231c: R_X86_64_PC32 strlen+0xfffffffffffffffc 2320: eb 0a jmp 232c 2322: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) 2328: 48 83 c3 01 add $0x1,%rbx 232c: 48 39 d8 cmp %rbx,%rax 232f: 76 47 jbe 2378 2331: 41 80 3c 1c 2f cmpb $0x2f,(%r12,%rbx,1) 2336: 75 f0 jne 2328 2338: 41 c6 04 1c 5f movb $0x5f,(%r12,%rbx,1) 233d: 4c 89 e7 mov %r12,%rdi 2340: e8 00 00 00 00 callq 2345 2341: R_X86_64_PC32 strlen+0xfffffffffffffffc 2345: eb e1 jmp 2328 Yep, that's right: gcc isn't smart enough to realize that replacing '/' by '_' cannot change the strlen(), so we call it again and again (at least when a '/' is found). Even if gcc were that smart, this construction would still loop over the string twice, once for the initial strlen() call and then the open-coded loop. Let's simply use strreplace() instead. Signed-off-by: Rasmus Villemoes Acked-by: Steven Rostedt Liked-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/trace/blktrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 483cecf..4eeae46 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -439,7 +439,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; - int ret, i; + int ret; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -451,9 +451,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ - for (i = 0; i < strlen(buts->name); i++) - if (buts->name[i] == '/') - buts->name[i] = '_'; + strreplace(buts->name, '/', '_'); bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) -- cgit v1.1 From 2abf114fc841550a3e0c63afcd31c9781116fe42 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:30 -0700 Subject: lib/kobject.c: use strreplace() There's probably not many slashes in the name, but starting over when we see one feels wrong. Signed-off-by: Rasmus Villemoes Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kobject.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/lib/kobject.c b/lib/kobject.c index 3b841b9..75ee638 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -257,23 +257,20 @@ static int kobject_add_internal(struct kobject *kobj) int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs) { - const char *old_name = kobj->name; char *s; if (kobj->name && !fmt) return 0; - kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); - if (!kobj->name) { - kobj->name = old_name; + s = kvasprintf(GFP_KERNEL, fmt, vargs); + if (!s) return -ENOMEM; - } /* ewww... some of these buggers have '/' in the name ... */ - while ((s = strchr(kobj->name, '/'))) - s[0] = '!'; + strreplace(s, '/', '!'); + kfree(kobj->name); + kobj->name = s; - kfree(old_name); return 0; } -- cgit v1.1 From a29fd614a6af57610b1963e3445f1758c3806187 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:33 -0700 Subject: drivers/base/core.c: use strreplace() This eliminates a little .text and avoids repeating the strchr call when we meet a '!' (which will happen at least once). Signed-off-by: Rasmus Villemoes Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 21d1303..dafae6d 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1303,12 +1303,11 @@ const char *device_get_devnode(struct device *dev, return dev_name(dev); /* replace '!' in the name with '/' */ - *tmp = kstrdup(dev_name(dev), GFP_KERNEL); - if (!*tmp) + s = kstrdup(dev_name(dev), GFP_KERNEL); + if (!s) return NULL; - while ((s = strchr(*tmp, '!'))) - s[0] = '/'; - return *tmp; + strreplace(s, '!', '/'); + return *tmp = s; } /** -- cgit v1.1 From 90a9befb20bd455b167b02d4018b5e882da76505 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:36 -0700 Subject: drivers/md/md.c: use strreplace() There's no point in starting over when we meet a '/'. This also eliminates a stack variable and a little .text. Signed-off-by: Rasmus Villemoes Acked-by: NeilBrown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4dbed4a..8d9f89b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2024,7 +2024,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; struct kobject *ko; - char *s; int err; /* prevent duplicates */ @@ -2070,8 +2069,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) return -EBUSY; } bdevname(rdev->bdev,b); - while ( (s=strchr(b, '/')) != NULL) - *s = '!'; + strreplace(b, '/', '!'); rdev->mddev = mddev; printk(KERN_INFO "md: bind<%s>\n", b); -- cgit v1.1 From 81ae394bdc473cafa5074948516fc83b504ea60b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:38 -0700 Subject: fs/jbd2/journal.c: use strreplace() In one case, we eliminate a local variable; in the other a strlen() call and some .text. Signed-off-by: Rasmus Villemoes Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b96bd80..5c187de 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1137,7 +1137,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, { journal_t *journal = journal_init_common(); struct buffer_head *bh; - char *p; int n; if (!journal) @@ -1150,9 +1149,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_blk_offset = start; journal->j_maxlen = len; bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; + strreplace(journal->j_devname, '/', '!'); jbd2_stats_proc_init(journal); n = journal->j_blocksize / sizeof(journal_block_tag_t); journal->j_wbufsize = n; @@ -1204,10 +1201,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; bdevname(journal->j_dev, journal->j_devname); - p = journal->j_devname; - while ((p = strchr(p, '/'))) - *p = '!'; - p = journal->j_devname + strlen(journal->j_devname); + p = strreplace(journal->j_devname, '/', '!'); sprintf(p, "-%lu", journal->j_inode->i_ino); jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", -- cgit v1.1 From ec3904dc6537fe606b281b368639236296ee7257 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 25 Jun 2015 15:02:41 -0700 Subject: fs/ext4/super.c: use strreplace() in ext4_fill_super() This makes a very large function a little smaller. Signed-off-by: Rasmus Villemoes Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ca9d4a2..5f3c43a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3420,7 +3420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; - char *cp; const char *descr; int ret = -ENOMEM; int blocksize, clustersize; @@ -3456,8 +3455,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #endif /* Cleanup superblock name */ - for (cp = sb->s_id; (cp = strchr(cp, '/'));) - *cp = '!'; + strreplace(sb->s_id, '/', '!'); /* -EINVAL is default */ ret = -EINVAL; -- cgit v1.1 From cbdc281019b8c5c905ef597511471b18e0fd4a15 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 25 Jun 2015 15:02:43 -0700 Subject: drivers/firmware/memmap.c: fix kernel-doc format Fix kernel-doc format validation to be able to use kernel-doc script for checking it. Signed-off-by: Michal Simek Reviewed-by: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/memmap.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index cc016c6..5de3ed2 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -144,7 +144,9 @@ static struct kobj_type __refdata memmap_ktype = { * * Common implementation of firmware_map_add() and firmware_map_add_early() * which expects a pre-allocated struct firmware_map_entry. - **/ + * + * Return: 0 always + */ static int firmware_map_add_entry(u64 start, u64 end, const char *type, struct firmware_map_entry *entry) @@ -170,7 +172,7 @@ static int firmware_map_add_entry(u64 start, u64 end, * @entry: removed entry. * * The caller must hold map_entries_lock, and release it properly. - **/ + */ static inline void firmware_map_remove_entry(struct firmware_map_entry *entry) { list_del(&entry->list); @@ -208,7 +210,7 @@ static inline void remove_sysfs_fw_map_entry(struct firmware_map_entry *entry) kobject_put(&entry->kobj); } -/* +/** * firmware_map_find_entry_in_list() - Search memmap entry in a given list. * @start: Start of the memory range. * @end: End of the memory range (exclusive). @@ -236,7 +238,7 @@ firmware_map_find_entry_in_list(u64 start, u64 end, const char *type, return NULL; } -/* +/** * firmware_map_find_entry() - Search memmap entry in map_entries. * @start: Start of the memory range. * @end: End of the memory range (exclusive). @@ -254,7 +256,7 @@ firmware_map_find_entry(u64 start, u64 end, const char *type) return firmware_map_find_entry_in_list(start, end, type, &map_entries); } -/* +/** * firmware_map_find_entry_bootmem() - Search memmap entry in map_entries_bootmem. * @start: Start of the memory range. * @end: End of the memory range (exclusive). @@ -283,8 +285,8 @@ firmware_map_find_entry_bootmem(u64 start, u64 end, const char *type) * similar to function firmware_map_add_early(). The only difference is that * it will create the syfs entry dynamically. * - * Returns 0 on success, or -ENOMEM if no memory could be allocated. - **/ + * Return: 0 on success, or -ENOMEM if no memory could be allocated. + */ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; @@ -325,8 +327,8 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) * * That function must be called before late_initcall. * - * Returns 0 on success, or -ENOMEM if no memory could be allocated. - **/ + * Return: 0 on success, or -ENOMEM if no memory could be allocated. + */ int __init firmware_map_add_early(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; @@ -346,8 +348,8 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type) * * removes a firmware mapping entry. * - * Returns 0 on success, or -EINVAL if no entry. - **/ + * Return: 0 on success, or -EINVAL if no entry. + */ int __meminit firmware_map_remove(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; -- cgit v1.1 From cb426e99ff9225e94fb56bd4c5cfcce8b78a3904 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:46 -0700 Subject: checkpatch: check for uncommented waitqueue_active() Linus sayeth: : Pretty much every single time people use this "if : (waitqueue_active())" model, it tends to be a bug, because it means : that there is zero serialization with people who are just about to go : to sleep. It's fundamentally racy against all the "wait_event()" loops : that carefully do memory barriers between testing conditions and going : to sleep, because the memory barriers now don't exist on the waking : side. : : So I'm making a new rule: if you use waitqueue_active(), I want an : explanation for why it's not racy with the waiter. A big comment about : the memory ordering, or about higher-level locks that are held by the : caller, or something. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c5ec977..3f2ff26 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4898,6 +4898,13 @@ sub process { "memory barrier without comment\n" . $herecurr); } } +# check for waitqueue_active without a comment. + if ($line =~ /\bwaitqueue_active\s*\(/) { + if (!ctx_has_comment($first_line, $linenr)) { + WARN("WAITQUEUE_ACTIVE", + "waitqueue_active without comment\n" . $herecurr); + } + } # check of hardware specific defines if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { CHK("ARCH_DEFINES", -- cgit v1.1 From e6176fa4728fb6df4f66c3e9c08736c369e71f75 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:49 -0700 Subject: checkpatch: add --strict warning for c99 fixed size typedefs : int_t Using declarations like u_int16_t in kernel code is not preferred. Suggest the kernel sized types instead of the c99 types when not in the uapi directory. Add a $typeC99Typedefs variable for the types to check and neaten the other typedef variables. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3f2ff26..f7f222a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -347,15 +347,20 @@ our $UTF8 = qr{ | $NON_ASCII_UTF8 }x; +our $typeC99Typedefs = qr{(?:__)?(?:[us]_?)?int_?(?:8|16|32|64)_t}; our $typeOtherOSTypedefs = qr{(?x: u_(?:char|short|int|long) | # bsd u(?:nchar|short|int|long) # sysv )}; - -our $typeTypedefs = qr{(?x: +our $typeKernelTypedefs = qr{(?x: (?:__)?(?:u|s|be|le)(?:8|16|32|64)| atomic_t )}; +our $typeTypedefs = qr{(?x: + $typeC99Typedefs\b| + $typeOtherOSTypedefs\b| + $typeKernelTypedefs\b +)}; our $logFunctions = qr{(?x: printk(?:_ratelimited|_once|)| @@ -516,7 +521,6 @@ sub build_types { my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; $BasicType = qr{ - (?:$typeOtherOSTypedefs\b)| (?:$typeTypedefs\b)| (?:${all}\b) }x; @@ -524,7 +528,6 @@ sub build_types { (?:$Modifier\s+|const\s+)* (?: (?:typeof|__typeof__)\s*\([^\)]*\)| - (?:$typeOtherOSTypedefs\b)| (?:$typeTypedefs\b)| (?:${all}\b) ) @@ -542,7 +545,6 @@ sub build_types { (?: (?:typeof|__typeof__)\s*\([^\)]*\)| (?:$typeTypedefs\b)| - (?:$typeOtherOSTypedefs\b)| (?:${allWithAttr}\b) ) (?:\s+$Modifier|\s+const)* @@ -3264,7 +3266,6 @@ sub process { $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ && $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && $line !~ /\b$typeTypedefs\b/ && - $line !~ /\b$typeOtherOSTypedefs\b/ && $line !~ /\b__bitwise(?:__|)\b/) { WARN("NEW_TYPEDEFS", "do not add new typedefs\n" . $herecurr); @@ -4980,6 +4981,24 @@ sub process { "Using weak declarations can have unintended link defects\n" . $herecurr); } +# check for c99 types like uint8_t used outside of uapi/ + if ($realfile !~ m@\binclude/uapi/@ && + $line =~ /\b($Declare)\s*$Ident\s*[=;,\[]/) { + my $type = $1; + if ($type =~ /\b($typeC99Typedefs)\b/) { + $type = $1; + my $kernel_type = 'u'; + $kernel_type = 's' if ($type =~ /^_*[si]/); + $type =~ /(\d+)/; + $kernel_type .= $1; + if (CHK("PREFER_KERNEL_TYPES", + "Prefer kernel type '$kernel_type' over '$type'\n" . $herecurr) && + $fix) { + $fixed[$fixlinenr] =~ s/\b$type\b/$kernel_type/; + } + } + } + # check for sizeof(&) if ($line =~ /\bsizeof\s*\(\s*\&/) { WARN("SIZEOF_ADDRESS", -- cgit v1.1 From 485ff23ed2575e9d479f45227749cbbcd5df3e19 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Thu, 25 Jun 2015 15:02:52 -0700 Subject: checkpatch: make types found in a source file/patch local checkpatch uses various cues in its input patches and files to discover the names of user-defined types and modifiers. It then uses that information when processing expressions to discover potential style issues. Unfortunately, in rare cases, this means that checkpatch may give different results if you run it on several input files in one execution, or one by one! The reason is that it may identify a type (or something that looks like a type) in one file, and then carry this information over when processing a different file. For example, drivers/staging/media/bcm2048/radio-bcm2048.c contains this line (in a macro): size value; and drivers/staging/media/davinci_vpfe/vpfe_video.c has this line: while (size * *nbuffers > vpfe_dev->video_limit) If checkpatch processes these 2 files in a single command like: ./scripts/checkpatch.pl -f $file1 $file2 the (spurious) "size" type detected in the first file will cause it to flag the second file for improper use of the pointer dereference operator. To fix this, store types and modifiers found in a file in separate arrays from built-in ones, and reset the arrays of types and modifiers found in files at the beginning of each new source file. Signed-off-by: Alex Dowad Signed-off-by: Joe Perches Acked-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f7f222a..e46414d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -423,6 +423,7 @@ our @typeList = ( qr{${Ident}_handler_fn}, @typeListMisordered, ); +our @typeListFile = (); our @typeListWithAttr = ( @typeList, qr{struct\s+$InitAttribute\s+$Ident}, @@ -432,6 +433,7 @@ our @typeListWithAttr = ( our @modifierList = ( qr{fastcall}, ); +our @modifierListFile = (); our @mode_permission_funcs = ( ["module_param", 3], @@ -515,8 +517,8 @@ if ($codespell) { $misspellings = join("|", sort keys %spelling_fix) if keys %spelling_fix; sub build_types { - my $mods = "(?x: \n" . join("|\n ", @modifierList) . "\n)"; - my $all = "(?x: \n" . join("|\n ", @typeList) . "\n)"; + my $mods = "(?x: \n" . join("|\n ", (@modifierList, @modifierListFile)) . "\n)"; + my $all = "(?x: \n" . join("|\n ", (@typeList, @typeListFile)) . "\n)"; my $Misordered = "(?x: \n" . join("|\n ", @typeListMisordered) . "\n)"; my $allWithAttr = "(?x: \n" . join("|\n ", @typeListWithAttr) . "\n)"; $Modifier = qr{(?:$Attribute|$Sparse|$mods)}; @@ -748,6 +750,9 @@ for my $filename (@ARGV) { @fixed_inserted = (); @fixed_deleted = (); $fixlinenr = -1; + @modifierListFile = (); + @typeListFile = (); + build_types(); } exit($exit); @@ -1612,13 +1617,13 @@ sub possible { for my $modifier (split(' ', $possible)) { if ($modifier !~ $notPermitted) { warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible); - push(@modifierList, $modifier); + push(@modifierListFile, $modifier); } } } else { warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible); - push(@typeList, $possible); + push(@typeListFile, $possible); } build_types(); } else { -- cgit v1.1 From 33acb54a4379ccd3494580bdc30af1aa13ee4aab Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:54 -0700 Subject: checkpatch: use $String consistently String detection where a source line with a string constant is converted can either have an X or a space. Some of the string detection regexes do not allow the space character, but there is a handy $String variable that does. Convert the remaining uses of string detection regexes to use the $String variable to reduce possible false negatives. Signed-off-by: Joe Perches Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e46414d..6266970 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1008,7 +1008,7 @@ sub sanitise_line { sub get_quoted_string { my ($line, $rawline) = @_; - return "" if ($line !~ m/(\"[X\t]+\")/g); + return "" if ($line !~ m/($String)/g); return substr($rawline, $-[0], $+[0] - $-[0]); } @@ -4343,8 +4343,8 @@ sub process { } # Flatten any obvious string concatentation. - while ($dstat =~ s/("X*")\s*$Ident/$1/ || - $dstat =~ s/$Ident\s*("X*")/$1/) + while ($dstat =~ s/($String)\s*$Ident/$1/ || + $dstat =~ s/$Ident\s*($String)/$1/) { } @@ -4625,7 +4625,7 @@ sub process { # to grep for the string. Make exceptions when the previous string ends in a # newline (multiple lines in one string constant) or '\t', '\r', ';', or '{' # (common in inline assembly) or is a octal \123 or hexadecimal \xaf value - if ($line =~ /^\+\s*"[X\t]*"/ && + if ($line =~ /^\+\s*$String/ && $prevline =~ /"\s*$/ && $prevrawline !~ /(?:\\(?:[ntr]|[0-7]{1,3}|x[0-9a-fA-F]{1,2})|;\s*|\{\s*)"\s*$/) { if (WARN("SPLIT_STRING", @@ -4671,13 +4671,13 @@ sub process { } # concatenated string without spaces between elements - if ($line =~ /"X+"[A-Z_]+/ || $line =~ /[A-Z_]+"X+"/) { + if ($line =~ /$String[A-Z_]/ || $line =~ /[A-Za-z0-9_]$String/) { CHK("CONCATENATED_STRING", "Concatenated strings should use spaces between elements\n" . $herecurr); } # uncoalesced string fragments - if ($line =~ /"X*"\s*"/) { + if ($line =~ /$String\s*"/) { WARN("STRING_FRAGMENTS", "Consecutive strings are generally better as a single string\n" . $herecurr); } -- cgit v1.1 From 47e0c88b37a5c3d6732f4ec896dfa8aa55868b4f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:02:57 -0700 Subject: checkpatch: categorize some long line length checks Many lines of code extend beyond the maximum line length. Some of these are possibly justified by use type. For instance: structure definitions where comments are added per member like: struct foo { type member; /* some long description */ And lines that don't fit the typical logging message style where a string constant is used like: SOME_MACRO(args, "Some long string"); Categorize these long line types so that checkpatch can use a command-line --ignore= option to avoid emitting some long line warnings. One of the existing checkpatch exclusions allowed kernel-doc argument documentation to exceed 80 columns because old versions of kernel-doc required single line documentation. The requirement was removed in 2009 so remove that exclusion. Add documentation to make the test a bit clearer. Signed-off-by: Joe Perches Cc: Julia Lawall Cc: Michael Shuey Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 60 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6266970..34f1415 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2517,16 +2517,56 @@ sub process { # check we are in a valid source file if not then ignore this hunk next if ($realfile !~ /\.(h|c|s|S|pl|sh|dtsi|dts)$/); -#line length limit - if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ && - $rawline !~ /^.\s*\*\s*\@$Ident\s/ && - !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?$String\s*(?:|,|\)\s*;)\s*$/ || - $line =~ /^\+\s*$String\s*(?:\s*|,|\)\s*;)\s*$/ || - $line =~ /^\+\s*#\s*define\s+\w+\s+$String$/) && - $length > $max_line_length) - { - WARN("LONG_LINE", - "line over $max_line_length characters\n" . $herecurr); +# line length limit (with some exclusions) +# +# There are a few types of lines that may extend beyond $max_line_length: +# logging functions like pr_info that end in a string +# lines with a single string +# #defines that are a single string +# +# There are 3 different line length message types: +# LONG_LINE_COMMENT a comment starts before but extends beyond $max_linelength +# LONG_LINE_STRING a string starts before but extends beyond $max_line_length +# LONG_LINE all other lines longer than $max_line_length +# +# if LONG_LINE is ignored, the other 2 types are also ignored +# + + if ($length > $max_line_length) { + my $msg_type = "LONG_LINE"; + + # Check the allowed long line types first + + # logging functions that end in a string that starts + # before $max_line_length + if ($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(?:KERN_\S+\s*|[^"]*))?($String\s*(?:|,|\)\s*;)\s*)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = ""; + + # lines with only strings (w/ possible termination) + # #defines with only strings + } elsif ($line =~ /^\+\s*$String\s*(?:\s*|,|\)\s*;)\s*$/ || + $line =~ /^\+\s*#\s*define\s+\w+\s+$String$/) { + $msg_type = ""; + + # Otherwise set the alternate message types + + # a comment starts before $max_line_length + } elsif ($line =~ /($;[\s$;]*)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = "LONG_LINE_COMMENT" + + # a quoted string starts before $max_line_length + } elsif ($sline =~ /\s*($String(?:\s*(?:\\|,\s*|\)\s*;\s*))?)$/ && + length(expand_tabs(substr($line, 1, length($line) - length($1) - 1))) <= $max_line_length) { + $msg_type = "LONG_LINE_STRING" + } + + if ($msg_type ne "" && + (show_type("LONG_LINE") || show_type($msg_type))) { + WARN($msg_type, + "line over $max_line_length characters\n" . $herecurr); + } } # check for adding lines without a newline. -- cgit v1.1 From d8469f16207c626d71749ada88c13db1238df39e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:00 -0700 Subject: checkpatch: improve output with multiple command-line files If there are multiple patches/files on the command line, use a prefix before the patch/file message output like: -------------- patch/filename -------------- to make the identifying which messages go with which file/patch a bit easier to parse. Move the perl version and false positive messages after all the files have been scanned so that they are emitted only once. Standardize the NOTE: <...> form to always emit a blank line before the NOTE and always use print << "EOM" style. Signed-off-by: Joe Perches Suggested-by: Petr Mladek Tested-by: Petr Mladek Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 62 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 34f1415..1b999cc 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -197,11 +197,11 @@ sub hash_show_words { my ($hashRef, $prefix) = @_; if ($quiet == 0 && keys %$hashRef) { - print "NOTE: $prefix message types:"; + print "\nNOTE: $prefix message types:"; foreach my $word (sort keys %$hashRef) { print " $word"; } - print "\n\n"; + print "\n"; } } @@ -741,6 +741,13 @@ for my $filename (@ARGV) { push(@rawlines, $_); } close($FILE); + + if ($#ARGV > 0 && $quiet == 0) { + print '-' x length($vname) . "\n"; + print "$vname\n"; + print '-' x length($vname) . "\n"; + } + if (!process($filename)) { $exit = 1; } @@ -755,6 +762,23 @@ for my $filename (@ARGV) { build_types(); } +if (!$quiet) { + if ($^V lt 5.10.0) { + print << "EOM" + +NOTE: perl $^V is not modern enough to detect all possible issues. + An upgrade to at least perl v5.10.0 is suggested. +EOM + } + if ($exit) { + print << "EOM" + +NOTE: If any of the errors are false positives, please report + them to the maintainer, see CHECKPATCH in MAINTAINERS. +EOM + } +} + exit($exit); sub top_of_kernel_tree { @@ -5578,22 +5602,18 @@ sub process { print "total: $cnt_error errors, $cnt_warn warnings, " . (($check)? "$cnt_chk checks, " : "") . "$cnt_lines lines checked\n"; - print "\n" if ($quiet == 0); } if ($quiet == 0) { - - if ($^V lt 5.10.0) { - print("NOTE: perl $^V is not modern enough to detect all possible issues.\n"); - print("An upgrade to at least perl v5.10.0 is suggested.\n\n"); - } - # If there were whitespace errors which cleanpatch can fix # then suggest that. if ($rpt_cleaners) { - print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n"; - print " scripts/cleanfile\n\n"; $rpt_cleaners = 0; + print << "EOM" + +NOTE: Whitespace errors detected. + You may wish to use scripts/cleanpatch or scripts/cleanfile +EOM } } @@ -5627,6 +5647,7 @@ sub process { if (!$quiet) { print << "EOM"; + Wrote EXPERIMENTAL --fix correction(s) to '$newfile' Do _NOT_ trust the results written to this file. @@ -5634,22 +5655,17 @@ Do _NOT_ submit these changes without inspecting them for correctness. This EXPERIMENTAL file is simply a convenience to help rewrite patches. No warranties, expressed or implied... - EOM } } - if ($clean == 1 && $quiet == 0) { - print "$vname has no obvious style problems and is ready for submission.\n" - } - if ($clean == 0 && $quiet == 0) { - print << "EOM"; -$vname has style problems, please review. - -If any of these errors are false positives, please report -them to the maintainer, see CHECKPATCH in MAINTAINERS. -EOM + if ($quiet == 0) { + print "\n"; + if ($clean == 1) { + print "$vname has no obvious style problems and is ready for submission.\n"; + } else { + print "$vname has style problems, please review.\n"; + } } - return $clean; } -- cgit v1.1 From 57230297116faf5b0a995916d5dd5fedab54cba3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:03 -0700 Subject: checkpatch: colorize output to terminal Add optional colors to make seeing message types a bit easier. Add --color command line switch, default:on Error is RED, warning is YELLOW, check is GREEN. The message type, if shown, is BLUE. Signed-off-by: Joe Perches Tested-by: Petr Mladek Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1b999cc..d52293f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -9,6 +9,7 @@ use strict; use POSIX; use File::Basename; use Cwd 'abs_path'; +use Term::ANSIColor qw(:constants); my $P = $0; my $D = dirname(abs_path($P)); @@ -49,6 +50,7 @@ my $min_conf_desc_length = 4; my $spelling_file = "$D/spelling.txt"; my $codespell = 0; my $codespellfile = "/usr/local/share/codespell/dictionary.txt"; +my $color = 1; sub help { my ($exitcode) = @_; @@ -93,6 +95,7 @@ Options: --codespell Use the codespell dictionary for spelling/typos (default:/usr/local/share/codespell/dictionary.txt) --codespellfile Use this codespell dictionary + --color Use colors when output is STDOUT (default: on) -h, --help, --version display this help and exit When FILE is - read standard input. @@ -153,6 +156,7 @@ GetOptions( 'test-only=s' => \$tst_only, 'codespell!' => \$codespell, 'codespellfile=s' => \$codespellfile, + 'color!' => \$color, 'h|help' => \$help, 'version' => \$help ) or help(1); @@ -1672,15 +1676,26 @@ sub report { (defined $tst_only && $msg !~ /\Q$tst_only\E/)) { return 0; } - my $line; + my $output = ''; + if (-t STDOUT && $color) { + if ($level eq 'ERROR') { + $output .= RED; + } elsif ($level eq 'WARNING') { + $output .= YELLOW; + } else { + $output .= GREEN; + } + } + $output .= $prefix . $level . ':'; if ($show_types) { - $line = "$prefix$level:$type: $msg\n"; - } else { - $line = "$prefix$level: $msg\n"; + $output .= BLUE if (-t STDOUT && $color); + $output .= "$type:"; } - $line = (split('\n', $line))[0] . "\n" if ($terse); + $output .= RESET if (-t STDOUT && $color); + $output .= ' ' . $msg . "\n"; + $output = (split('\n', $output))[0] . "\n" if ($terse); - push(our @report, $line); + push(our @report, $output); return 1; } -- cgit v1.1 From 34d8815f9512b01757e08f8101730503c87b6353 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:05 -0700 Subject: checkpatch: add --showfile to allow input via pipe to show filenames Using "git diff | ./scripts/checkpatch -" does not have an easy mechanism to see the files and lines actually modified. Add --showfile to see the file and line specified in the diff. When --showfile is used without --terse, the second line of each message output is redundant, so it is removed. Signed-off-by: Joe Perches Cc: Petr Mladek Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d52293f..46ebc6a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -25,6 +25,7 @@ my $chk_patch = 1; my $tst_only; my $emacs = 0; my $terse = 0; +my $showfile = 0; my $file = 0; my $check = 0; my $check_orig = 0; @@ -66,6 +67,7 @@ Options: --patch treat FILE as patchfile (default) --emacs emacs compile window format --terse one line per report + --showfile emit diffed file position, not input file position -f, --file treat FILE as regular source file --subjective, --strict enable more subjective tests --types TYPE(,TYPE2...) show only these comma separated message types @@ -137,6 +139,7 @@ GetOptions( 'patch!' => \$chk_patch, 'emacs!' => \$emacs, 'terse!' => \$terse, + 'showfile!' => \$showfile, 'f|file!' => \$file, 'subjective!' => \$check, 'strict!' => \$check, @@ -1693,6 +1696,12 @@ sub report { } $output .= RESET if (-t STDOUT && $color); $output .= ' ' . $msg . "\n"; + + if ($showfile) { + my @lines = split("\n", $output, -1); + splice(@lines, 1, 1); + $output = join("\n", @lines); + } $output = (split('\n', $output))[0] . "\n" if ($terse); push(our @report, $output); @@ -2119,10 +2128,6 @@ sub process { my $hunk_line = ($realcnt != 0); -#make up the handle for any error we report on this line - $prefix = "$filename:$realline: " if ($emacs && $file); - $prefix = "$filename:$linenr: " if ($emacs && !$file); - $here = "#$linenr: " if (!$file); $here = "#$realline: " if ($file); @@ -2152,6 +2157,13 @@ sub process { $found_file = 1; } +#make up the handle for any error we report on this line + if ($showfile) { + $prefix = "$realfile:$realline: " + } elsif ($emacs) { + $prefix = "$filename:$linenr: "; + } + if ($found_file) { if ($realfile =~ m@^(drivers/net/|net/)@) { $check = 1; @@ -5606,7 +5618,7 @@ sub process { ERROR("NOT_UNIFIED_DIFF", "Does not appear to be a unified-diff format patch\n"); } - if ($is_patch && $chk_signoff && $signoff == 0) { + if ($is_patch && $filename ne '-' && $chk_signoff && $signoff == 0) { ERROR("MISSING_SIGN_OFF", "Missing Signed-off-by: line(s)\n"); } -- cgit v1.1 From f1a63678554f8f7ff0425361b0142a69c0b815df Mon Sep 17 00:00:00 2001 From: Maxim Uvarov Date: Thu, 25 Jun 2015 15:03:08 -0700 Subject: checkpatch: remove local from codespell path local is typically used for manually installed apps. For apps installed from distro the right path is /usr/share. Signed-off-by: Maxim Uvarov Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 46ebc6a..ef24f92 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -50,7 +50,7 @@ my $minimum_perl_version = 5.10.0; my $min_conf_desc_length = 4; my $spelling_file = "$D/spelling.txt"; my $codespell = 0; -my $codespellfile = "/usr/local/share/codespell/dictionary.txt"; +my $codespellfile = "/usr/share/codespell/dictionary.txt"; my $color = 1; sub help { @@ -95,7 +95,7 @@ Options: --ignore-perl-version override checking of perl version. expect runtime errors. --codespell Use the codespell dictionary for spelling/typos - (default:/usr/local/share/codespell/dictionary.txt) + (default:/usr/share/codespell/dictionary.txt) --codespellfile Use this codespell dictionary --color Use colors when output is STDOUT (default: on) -h, --help, --version display this help and exit -- cgit v1.1 From 06330fc40e3f3034de4934ec347604b5e36c40c3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:11 -0700 Subject: checkpatch: avoid NOT_UNIFIED_DIFF errors on cover-letter.patch files Make an exception for the "Does not appear to be a unified-diff" error when scanning what appears to be git generated cover letters. Signed-off-by: Joe Perches Suggested-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index ef24f92..69c4716 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5614,7 +5614,7 @@ sub process { exit(0); } - if (!$is_patch) { + if (!$is_patch && $file !~ /cover-letter\.patch$/) { ERROR("NOT_UNIFIED_DIFF", "Does not appear to be a unified-diff format patch\n"); } -- cgit v1.1 From b6117d175be9972fc300f826e6f2bf9c589e0919 Mon Sep 17 00:00:00 2001 From: Mateusz Kulikowski Date: Thu, 25 Jun 2015 15:03:13 -0700 Subject: checkpatch: suggest using ether_addr_equal*() Check if memcmp() is used to compare ethernet addresses and suggest using ether_addr_equal() or ether_addr_equal_unaligned() Signed-off-by: Mateusz Kulikowski Acked-by: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 69c4716..f04fe88 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5157,6 +5157,14 @@ sub process { } } +# Check for memcmp(foo, bar, ETH_ALEN) that could be ether_addr_equal*(foo, bar) + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { + WARN("PREFER_ETHER_ADDR_EQUAL", + "Prefer ether_addr_equal() or ether_addr_equal_unaligned() over memcmp()\n" . "$here\n$stat\n") + } + # typecasts on min/max could be min_t/max_t if ($^V && $^V ge 5.10.0 && defined $stat && -- cgit v1.1 From 9e20a8535f3fdd88afe9fe17ae85c36bd37f4e71 Mon Sep 17 00:00:00 2001 From: Mateusz Kulikowski Date: Thu, 25 Jun 2015 15:03:16 -0700 Subject: checkpatch: fix processing of MEMSET issues Remove 's' modifier to avoid reporting the same warning several times. Signed-off-by: Mateusz Kulikowski Acked-by: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f04fe88..954f491 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5132,7 +5132,7 @@ sub process { # Check for misused memsets if ($^V && $^V ge 5.10.0 && defined $stat && - $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/s) { + $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/) { my $ms_addr = $2; my $ms_val = $7; -- cgit v1.1 From 8617cd09bc874dd7204b83aa3ed5fdc38b79562f Mon Sep 17 00:00:00 2001 From: Mateusz Kulikowski Date: Thu, 25 Jun 2015 15:03:19 -0700 Subject: checkpatch: suggest using eth_zero_addr() and eth_broadcast_addr() Suggest using eth_zero_addr() or eth_broadcast_addr() instead of memset(). Signed-off-by: Mateusz Kulikowski Acked-by: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 954f491..a81fc66 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5165,6 +5165,29 @@ sub process { "Prefer ether_addr_equal() or ether_addr_equal_unaligned() over memcmp()\n" . "$here\n$stat\n") } +# check for memset(foo, 0x0, ETH_ALEN) that could be eth_zero_addr +# check for memset(foo, 0xFF, ETH_ALEN) that could be eth_broadcast_addr + if ($^V && $^V ge 5.10.0 && + defined $stat && + $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { + + my $ms_val = $7; + + if ($ms_val =~ /^(?:0x|)0+$/i) { + if (WARN("PREFER_ETH_ZERO_ADDR", + "Prefer eth_zero_addr over memset()\n" . "$here\n$stat\n") && + $fix) { + $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_zero_addr($2)/; + } + } elsif ($ms_val =~ /^(?:0xff|255)$/i) { + if (WARN("PREFER_ETH_BROADCAST_ADDR", + "Prefer eth_broadcast_addr() over memset()\n" . "$here\n$stat\n") && + $fix) { + $fixed[$fixlinenr] =~ s/\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*,\s*ETH_ALEN\s*\)/eth_broadcast_addr($2)/; + } + } + } + # typecasts on min/max could be min_t/max_t if ($^V && $^V ge 5.10.0 && defined $stat && -- cgit v1.1 From 10895d2c82cce9650f7d34c81c1bd29958e15293 Mon Sep 17 00:00:00 2001 From: Mateusz Kulikowski Date: Thu, 25 Jun 2015 15:03:21 -0700 Subject: checkpatch: add multi-line handling for PREFER_ETHER_ADDR_COPY Handle multi-line memcpy() properly. Signed-off-by: Mateusz Kulikowski Acked-by: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a81fc66..dfeb553 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5149,9 +5149,10 @@ sub process { # Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar) if ($^V && $^V ge 5.10.0 && - $line =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/s) { + defined $stat && + $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) { if (WARN("PREFER_ETHER_ADDR_COPY", - "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . $herecurr) && + "Prefer ether_addr_copy() over memcpy() if the Ethernet addresses are __aligned(2)\n" . "$here\n$stat\n") && $fix) { $fixed[$fixlinenr] =~ s/\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/ether_addr_copy($2, $7)/; } -- cgit v1.1 From 5a6d20ce19b770c9946281783614294b3f570ab8 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Thu, 25 Jun 2015 15:03:24 -0700 Subject: checkpatch: validate MODULE_LICENSE content There is a well defined list of expected values for MODULE_LICENSE so warn the user upon usage of unknown values. Signed-off-by: Bjorn Andersson Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index dfeb553..4cf4473 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5626,6 +5626,24 @@ sub process { } } } + +# validate content of MODULE_LICENSE against list from include/linux/module.h + if ($line =~ /\bMODULE_LICENSE\s*\(\s*($String)\s*\)/) { + my $extracted_string = get_quoted_string($line, $rawline); + my $valid_licenses = qr{ + GPL| + GPL\ v2| + GPL\ and\ additional\ rights| + Dual\ BSD/GPL| + Dual\ MIT/GPL| + Dual\ MPL/GPL| + Proprietary + }x; + if ($extracted_string !~ /^"(?:$valid_licenses)"$/x) { + WARN("MODULE_LICENSE", + "unknown module license " . $extracted_string . "\n" . $herecurr); + } + } } # If we have no input at all, then there is nothing to report on -- cgit v1.1 From e518e9a59ec37a323b0f4785e2311a1ec1433c6d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:27 -0700 Subject: checkpatch: emit an error when there's a diff in a changelog People often put diff snippets in changelogs. This causes problems when one tries to apply a file containing both the changelog and the diff because patch(1) tries to apply the diff which it found in the changelog. Warn once when what seems to be a diff snippet in the changelog exists. Signed-off-by: Joe Perches Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 4cf4473..daf466d 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1954,6 +1954,7 @@ sub process { my $in_header_lines = $file ? 0 : 1; my $in_commit_log = 0; #Scanning lines before patch my $commit_log_long_line = 0; + my $commit_log_has_diff = 0; my $reported_maintainer_file = 0; my $non_utf8_charset = 0; @@ -2087,7 +2088,8 @@ sub process { my $rawline = $rawlines[$linenr - 1]; #extract the line range in the file after the patch is applied - if ($line=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { + if (!$in_commit_log && + $line =~ /^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) { $is_patch = 1; $first_line = $linenr + 1; $realline=$1-1; @@ -2181,6 +2183,17 @@ sub process { $cnt_lines++ if ($realcnt != 0); +# Check if the commit log has what seems like a diff which can confuse patch + if ($in_commit_log && !$commit_log_has_diff && + (($line =~ m@^\s+diff\b.*a/[\w/]+@ && + $line =~ m@^\s+diff\b.*a/([\w/]+)\s+b/$1\b@) || + $line =~ m@^\s*(?:\-\-\-\s+a/|\+\+\+\s+b/)@ || + $line =~ m/^\s*\@\@ \-\d+,\d+ \+\d+,\d+ \@\@/)) { + ERROR("DIFF_IN_COMMIT_MSG", + "Avoid using diff content in the commit message - patch(1) might not work\n" . $herecurr); + $commit_log_has_diff = 1; + } + # Check for incorrect file permissions if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) { my $permhere = $here . "FILE: $realfile\n"; -- cgit v1.1 From 3c816e490ca48af65ccb420e2462ab7344879e0c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 25 Jun 2015 15:03:29 -0700 Subject: checkpatch: emit "NOTE: " message only once after multiple files Make this message similar to the "false positives" message and emit it only once when scanning multiple files instead of after each file scanned. Signed-off-by: Joe Perches Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index daf466d..90e1edc 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -203,7 +203,7 @@ sub hash_save_array_words { sub hash_show_words { my ($hashRef, $prefix) = @_; - if ($quiet == 0 && keys %$hashRef) { + if (keys %$hashRef) { print "\nNOTE: $prefix message types:"; foreach my $word (sort keys %$hashRef) { print " $word"; @@ -770,6 +770,9 @@ for my $filename (@ARGV) { } if (!$quiet) { + hash_show_words(\%use_type, "Used"); + hash_show_words(\%ignore_type, "Ignored"); + if ($^V lt 5.10.0) { print << "EOM" @@ -5707,9 +5710,6 @@ EOM } } - hash_show_words(\%use_type, "Used"); - hash_show_words(\%ignore_type, "Ignored"); - if ($clean == 0 && $fix && ("@rawlines" ne "@fixed" || $#fixed_inserted >= 0 || $#fixed_deleted >= 0)) { -- cgit v1.1 From 35a4c902f61fc262aba03d5e8381334683e4d4aa Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 25 Jun 2015 15:03:32 -0700 Subject: fs/efs: femove unneeded cast kmem_cache_alloc() returns void*. Signed-off-by: Firo Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/efs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/efs/super.c b/fs/efs/super.c index 7fca462..c8411a3 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -67,7 +67,7 @@ static struct kmem_cache * efs_inode_cachep; static struct inode *efs_alloc_inode(struct super_block *sb) { struct efs_inode_info *ei; - ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; -- cgit v1.1 From bffacb9132a306b7e22bb6366e5b277f20f67465 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Thu, 25 Jun 2015 15:03:35 -0700 Subject: kasan: remove duplicate definition of the macro KASAN_FREE_PAGE Remove duplicate definition of the macro KASAN_FREE_PAGE in mm/kasan/kasan.h Signed-off-by: Wang Long Acked-by: Andrey Ryabinin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 4986b0a..c242adf 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,7 +7,6 @@ #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) #define KASAN_FREE_PAGE 0xFF /* page was freed */ -#define KASAN_FREE_PAGE 0xFF /* page was freed */ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ -- cgit v1.1 From c69e3c3a0c014e86750e78b7e2ae823f7a9b2cb2 Mon Sep 17 00:00:00 2001 From: Vishnu Pratap Singh Date: Thu, 25 Jun 2015 15:03:37 -0700 Subject: init/do_mounts.c: add create_dev() failure log If create_dev() function fails to create the root mount device (/dev/root), then it goes to panic as root device not found but there is no printk in this case. So I have added the log in case it fails to create the root device. It will help in debugging. [akpm@linux-foundation.org: simplify printk(), use pr_emerg(), display errno] Signed-off-by: Vishnu Pratap Singh Acked-by: Pavel Machek Cc: Paul Gortmaker Cc: Mike Snitzer Cc: Dan Ehrenberg Cc: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/do_mounts.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/init/do_mounts.c b/init/do_mounts.c index a95bbdb..dea5de9 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -533,8 +533,13 @@ void __init mount_root(void) } #endif #ifdef CONFIG_BLOCK - create_dev("/dev/root", ROOT_DEV); - mount_block_root("/dev/root", root_mountflags); + { + int err = create_dev("/dev/root", ROOT_DEV); + + if (err < 0) + pr_emerg("Failed to create /dev/root: %d\n", err); + mount_block_root("/dev/root", root_mountflags); + } #endif } -- cgit v1.1 From 83b08bf7defa702cc6ef975fb05c437cd331826c Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 25 Jun 2015 15:03:40 -0700 Subject: fs/minix: remove unneeded cast kmem_cache_alloc() returns void*. Signed-off-by: Firo Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/minix/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 1182d1e..086cd0a 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -62,7 +62,7 @@ static struct kmem_cache * minix_inode_cachep; static struct inode *minix_alloc_inode(struct super_block *sb) { struct minix_inode_info *ei; - ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; -- cgit v1.1 From 9f0f564abbd8812edeef2c839ab3e6ba16595491 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 25 Jun 2015 15:03:43 -0700 Subject: fs/befs/btree.c: remove unneeded initializations bh, od_sup and this_node are unconditionally initialized in befs_bt_read_super() and befs_btree_find() Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/befs/btree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 0826e91..22c16628 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -137,8 +137,8 @@ static int befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, befs_btree_super * sup) { - struct buffer_head *bh = NULL; - befs_disk_btree_super *od_sup = NULL; + struct buffer_head *bh; + befs_disk_btree_super *od_sup; befs_debug(sb, "---> %s", __func__); @@ -250,7 +250,7 @@ int befs_btree_find(struct super_block *sb, befs_data_stream * ds, const char *key, befs_off_t * value) { - struct befs_btree_node *this_node = NULL; + struct befs_btree_node *this_node; befs_btree_super bt_super; befs_off_t node_off; int res; -- cgit v1.1 From f73c2f1f83ca1c4e2f4515f987973bfe56c86973 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Jun 2015 15:03:45 -0700 Subject: NILFS2: support NFSv2 export The "fh_len" passed to ->fh_to_* is not guaranteed to be that same as that returned by encode_fh - it may be larger. With NFSv2, the filehandle is fixed length, so it may appear longer than expected and be zero-padded. So we must test that fh_len is at least some value, not exactly equal to it. Signed-off-by: NeilBrown Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/namei.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 2218083..37dd6b0 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -496,8 +496,7 @@ static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && - fh_len != NILFS_FID_SIZE_CONNECTABLE) || + if (fh_len < NILFS_FID_SIZE_NON_CONNECTABLE || (fh_type != FILEID_NILFS_WITH_PARENT && fh_type != FILEID_NILFS_WITHOUT_PARENT)) return NULL; @@ -510,7 +509,7 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - if (fh_len != NILFS_FID_SIZE_CONNECTABLE || + if (fh_len < NILFS_FID_SIZE_CONNECTABLE || fh_type != FILEID_NILFS_WITH_PARENT) return NULL; -- cgit v1.1 From 89bfae43c8999344c3025802a3652ac42f55278e Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 25 Jun 2015 15:03:48 -0700 Subject: fs/reiserfs: remove unneeded cast kmem_cache_alloc() returns void*. Signed-off-by: Firo Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0111ad0..d766bfa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -588,8 +588,7 @@ static struct kmem_cache *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; - ei = (struct reiserfs_inode_info *) - kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); + ei = kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; atomic_set(&ei->openers, 0); -- cgit v1.1 From 5202efe544c279be152f44f2821010ff7b2d7e5b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Thu, 25 Jun 2015 15:03:51 -0700 Subject: coredump: use from_kuid/kgid when formatting corename When adding __printf attribute to cn_printf, gcc reports some issues: fs/coredump.c:213:5: warning: format '%d' expects argument of type 'int', but argument 3 has type 'kuid_t' [-Wformat=] err = cn_printf(cn, "%d", cred->uid); ^ fs/coredump.c:217:5: warning: format '%d' expects argument of type 'int', but argument 3 has type 'kgid_t' [-Wformat=] err = cn_printf(cn, "%d", cred->gid); ^ These warnings come from the fact that the value of uid/gid needs to be extracted from the kuid_t/kgid_t structure before being used as an integer. More precisely, cred->uid and cred->gid need to be converted to either user-namespace uid/gid or to init_user_ns uid/gid. Use init_user_ns in order not to break existing ABI, and document this in Documentation/sysctl/kernel.txt. While at it, format uid and gid values with %u instead of %d because uid_t/__kernel_uid32_t and gid_t/__kernel_gid32_t are unsigned int. Signed-off-by: Nicolas Iooss Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 4 ++-- fs/coredump.c | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index e5d528e..6fccb69 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -197,8 +197,8 @@ core_pattern is used to specify a core dumpfile pattern name. %P global pid (init PID namespace) %i tid %I global tid (init PID namespace) - %u uid - %g gid + %u uid (in initial user namespace) + %g gid (in initial user namespace) %d dump mode, matches PR_SET_DUMPABLE and /proc/sys/fs/suid_dumpable %s signal number diff --git a/fs/coredump.c b/fs/coredump.c index bbbe139..833a57b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -209,11 +209,15 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) break; /* uid */ case 'u': - err = cn_printf(cn, "%d", cred->uid); + err = cn_printf(cn, "%u", + from_kuid(&init_user_ns, + cred->uid)); break; /* gid */ case 'g': - err = cn_printf(cn, "%d", cred->gid); + err = cn_printf(cn, "%u", + from_kgid(&init_user_ns, + cred->gid)); break; case 'd': err = cn_printf(cn, "%d", -- cgit v1.1 From b4176b7c135ed28e78752552358919e5b14ad2bf Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Thu, 25 Jun 2015 15:03:53 -0700 Subject: coredump: add __printf attribute to cn_*printf functions This allows detecting improper format string at build time, like: fs/coredump.c:225:5: warning: format '%ld' expects argument of type 'long int', but argument 3 has type 'int' [-Wformat=] err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); ^ As si_signo is always an int, the format should be %d here. Signed-off-by: Nicolas Iooss Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 833a57b..e52e006 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -70,7 +70,8 @@ static int expand_corename(struct core_name *cn, int size) return 0; } -static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg) +static __printf(2, 0) int cn_vprintf(struct core_name *cn, const char *fmt, + va_list arg) { int free, need; va_list arg_copy; @@ -93,7 +94,7 @@ again: return -ENOMEM; } -static int cn_printf(struct core_name *cn, const char *fmt, ...) +static __printf(2, 3) int cn_printf(struct core_name *cn, const char *fmt, ...) { va_list arg; int ret; @@ -105,7 +106,8 @@ static int cn_printf(struct core_name *cn, const char *fmt, ...) return ret; } -static int cn_esc_printf(struct core_name *cn, const char *fmt, ...) +static __printf(2, 3) +int cn_esc_printf(struct core_name *cn, const char *fmt, ...) { int cur = cn->used; va_list arg; @@ -225,7 +227,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm) break; /* signal that caused the coredump */ case 's': - err = cn_printf(cn, "%ld", cprm->siginfo->si_signo); + err = cn_printf(cn, "%d", + cprm->siginfo->si_signo); break; /* UNIX time of coredump */ case 't': { -- cgit v1.1 From 51229b495340bd7a02ce3622d1966829b67054ea Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 25 Jun 2015 15:03:56 -0700 Subject: exit,stats: /* obey this comment */ There is a helpful comment in do_exit() that states we sync the mm's RSS info before statistics gathering. The function that does the statistics gathering is called right above that comment. Change the code to obey the comment. Signed-off-by: Rik van Riel Cc: Oleg Nesterov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/exit.c b/kernel/exit.c index 185752a..031325e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -711,10 +711,10 @@ void do_exit(long code) current->comm, task_pid_nr(current), preempt_count()); - acct_update_integrals(tsk); /* sync mm's RSS info before statistics gathering */ if (tsk->mm) sync_mm_rss(tsk->mm); + acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); -- cgit v1.1