diff options
Diffstat (limited to 'include')
36 files changed, 673 insertions, 203 deletions
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index eb1973b..5e1f345 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -98,14 +98,14 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release) #define atomic_long_xchg(v, new) \ (ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new))) -static inline void atomic_long_inc(atomic_long_t *l) +static __always_inline void atomic_long_inc(atomic_long_t *l) { ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; ATOMIC_LONG_PFX(_inc)(v); } -static inline void atomic_long_dec(atomic_long_t *l) +static __always_inline void atomic_long_dec(atomic_long_t *l) { ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; @@ -113,7 +113,7 @@ static inline void atomic_long_dec(atomic_long_t *l) } #define ATOMIC_LONG_OP(op) \ -static inline void \ +static __always_inline void \ atomic_long_##op(long i, atomic_long_t *l) \ { \ ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 630dd23..f90588a 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -81,6 +81,12 @@ extern void warn_slowpath_null(const char *file, const int line); do { printk(arg); __WARN_TAINT(taint); } while (0) #endif +/* used internally by panic.c */ +struct warn_args; + +void __warn(const char *file, int line, void *caller, unsigned taint, + struct pt_regs *regs, struct warn_args *args); + #ifndef WARN_ON #define WARN_ON(condition) ({ \ int __ret_warn_on = !!(condition); \ @@ -110,9 +116,10 @@ extern void warn_slowpath_null(const char *file, const int line); static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ - if (unlikely(__ret_warn_once)) \ - if (WARN_ON(!__warned)) \ - __warned = true; \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + WARN_ON(1); \ + } \ unlikely(__ret_warn_once); \ }) @@ -120,9 +127,10 @@ extern void warn_slowpath_null(const char *file, const int line); static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ - if (unlikely(__ret_warn_once)) \ - if (WARN(!__warned, format)) \ - __warned = true; \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + WARN(1, format); \ + } \ unlikely(__ret_warn_once); \ }) @@ -130,9 +138,10 @@ extern void warn_slowpath_null(const char *file, const int line); static bool __section(.data.unlikely) __warned; \ int __ret_warn_once = !!(condition); \ \ - if (unlikely(__ret_warn_once)) \ - if (WARN_TAINT(!__warned, taint, format)) \ - __warned = true; \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + WARN_TAINT(1, taint, format); \ + } \ unlikely(__ret_warn_once); \ }) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index c370b26..9401f48 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -783,6 +783,23 @@ static inline int pmd_clear_huge(pmd_t *pmd) } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ +#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * ARCHes with special requirements for evicting THP backing TLB entries can + * implement this. Otherwise also, it can help optimize normal TLB flush in + * THP regime. stock flush_tlb_range() typically has optimization to nuke the + * entire TLB TLB if flush span is greater than a threshold, which will + * likely be true for a single huge page. Thus a single thp flush will + * invalidate the entire TLB which is not desitable. + * e.g. see arch/arc: flush_pmd_tlb_range + */ +#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#else +#define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() +#endif +#endif + #endif /* !__ASSEMBLY__ */ #ifndef io_remap_pfn_range diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 89d9aa9e..c67f052 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -82,15 +82,15 @@ struct buffer_head { * and buffer_foo() functions. */ #define BUFFER_FNS(bit, name) \ -static inline void set_buffer_##name(struct buffer_head *bh) \ +static __always_inline void set_buffer_##name(struct buffer_head *bh) \ { \ set_bit(BH_##bit, &(bh)->b_state); \ } \ -static inline void clear_buffer_##name(struct buffer_head *bh) \ +static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ { \ clear_bit(BH_##bit, &(bh)->b_state); \ } \ -static inline int buffer_##name(const struct buffer_head *bh) \ +static __always_inline int buffer_##name(const struct buffer_head *bh) \ { \ return test_bit(BH_##bit, &(bh)->b_state); \ } @@ -99,11 +99,11 @@ static inline int buffer_##name(const struct buffer_head *bh) \ * test_set_buffer_foo() and test_clear_buffer_foo() */ #define TAS_BUFFER_FNS(bit, name) \ -static inline int test_set_buffer_##name(struct buffer_head *bh) \ +static __always_inline int test_set_buffer_##name(struct buffer_head *bh) \ { \ return test_and_set_bit(BH_##bit, &(bh)->b_state); \ } \ -static inline int test_clear_buffer_##name(struct buffer_head *bh) \ +static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ { \ return test_and_clear_bit(BH_##bit, &(bh)->b_state); \ } \ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4cd4ddf..d7c8de5 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -52,6 +52,10 @@ extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); extern bool compaction_restarting(struct zone *zone, int order); +extern int kcompactd_run(int nid); +extern void kcompactd_stop(int nid); +extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); + #else static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, int alloc_flags, @@ -84,6 +88,18 @@ static inline bool compaction_deferred(struct zone *zone, int order) return true; } +static inline int kcompactd_run(int nid) +{ + return 0; +} +static inline void kcompactd_stop(int nid) +{ +} + +static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx) +{ +} + #endif /* CONFIG_COMPACTION */ #if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 6b7fd9c..dd03e83 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -231,7 +231,7 @@ static inline long freezable_schedule_timeout_killable_unsafe(long timeout) * call this with locks held. */ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, - unsigned long delta, const enum hrtimer_mode mode) + u64 delta, const enum hrtimer_mode mode) { int __retval; freezer_do_not_count(); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bb16dfe..570383a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -105,8 +105,6 @@ struct vm_area_struct; * * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. * This takes precedence over the __GFP_MEMALLOC flag if both are set. - * - * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement. */ #define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) @@ -259,7 +257,7 @@ struct vm_area_struct; #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ - ~__GFP_KSWAPD_RECLAIM) + ~__GFP_RECLAIM) /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -333,22 +331,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) * - * ZONES_SHIFT must be <= 2 on 32 bit platforms. + * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms. */ -#if 16 * ZONES_SHIFT > BITS_PER_LONG -#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer +#if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4 +/* ZONE_DEVICE is not a valid GFP zone specifier */ +#define GFP_ZONES_SHIFT 2 +#else +#define GFP_ZONES_SHIFT ZONES_SHIFT +#endif + +#if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG +#error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer #endif #define GFP_ZONE_TABLE ( \ - (ZONE_NORMAL << 0 * ZONES_SHIFT) \ - | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT) \ - | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT) \ - | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT) \ - | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT) \ - | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT) \ - | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT) \ - | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT) \ + (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT) \ + | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT) \ + | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT) \ + | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT) \ + | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT) \ + | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT) \ + | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\ + | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\ ) /* @@ -373,8 +378,8 @@ static inline enum zone_type gfp_zone(gfp_t flags) enum zone_type z; int bit = (__force int) (flags & GFP_ZONEMASK); - z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) & - ((1 << ZONES_SHIFT) - 1); + z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) & + ((1 << GFP_ZONES_SHIFT) - 1); VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); return z; } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 2ead22d..c98c653 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -220,7 +220,7 @@ static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time timer->node.expires = ktime_add_safe(time, delta); } -static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) +static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, u64 delta) { timer->_softexpires = time; timer->node.expires = ktime_add_safe(time, ns_to_ktime(delta)); @@ -378,7 +378,7 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } /* Basic timer operations: */ extern void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, - unsigned long range_ns, const enum hrtimer_mode mode); + u64 range_ns, const enum hrtimer_mode mode); /** * hrtimer_start - (re)start an hrtimer on the current CPU @@ -399,7 +399,7 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) { - unsigned long delta; + u64 delta; ktime_t soft, hard; soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); @@ -477,10 +477,12 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); -extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, +extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, - unsigned long delta, const enum hrtimer_mode mode, int clock); + u64 delta, + const enum hrtimer_mode mode, + int clock); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 459fd25..5307dfb 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -41,7 +41,8 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, - TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, @@ -71,12 +72,6 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); ((__vma)->vm_flags & VM_HUGEPAGE))) && \ !((__vma)->vm_flags & VM_NOHUGEPAGE) && \ !is_vma_temporary_stack(__vma)) -#define transparent_hugepage_defrag(__vma) \ - ((transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \ - (__vma)->vm_flags & VM_HUGEPAGE)) #define transparent_hugepage_use_zero_page() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) @@ -101,16 +96,21 @@ static inline int split_huge_page(struct page *page) void deferred_split_huge_page(struct page *page); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long address); + unsigned long address, bool freeze); #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ if (pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ - __split_huge_pmd(__vma, __pmd, __address); \ + __split_huge_pmd(__vma, __pmd, __address, \ + false); \ } while (0) + +void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, + bool freeze, struct page *page); + #if HPAGE_PMD_ORDER >= MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif @@ -178,6 +178,10 @@ static inline int split_huge_page(struct page *page) static inline void deferred_split_huge_page(struct page *page) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) + +static inline void split_huge_pmd_address(struct vm_area_struct *vma, + unsigned long address, bool freeze, struct page *page) {} + static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f31638c..f4fa2b2 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,6 +357,7 @@ int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); +int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); @@ -368,6 +369,7 @@ int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigne int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); +int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index ee7229a..cb48330 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -48,7 +48,7 @@ static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) #define hlist_bl_entry(ptr, type, member) container_of(ptr,type,member) -static inline int hlist_bl_unhashed(const struct hlist_bl_node *h) +static inline bool hlist_bl_unhashed(const struct hlist_bl_node *h) { return !h->pprev; } @@ -68,7 +68,7 @@ static inline void hlist_bl_set_first(struct hlist_bl_head *h, h->first = (struct hlist_bl_node *)((unsigned long)n | LIST_BL_LOCKMASK); } -static inline int hlist_bl_empty(const struct hlist_bl_head *h) +static inline bool hlist_bl_empty(const struct hlist_bl_head *h) { return !((unsigned long)READ_ONCE(h->first) & ~LIST_BL_LOCKMASK); } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index f0c4bec..1191d79 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,7 +52,10 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, /* default hierarchy stats */ - MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS, + MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS, + MEMCG_SLAB_RECLAIMABLE, + MEMCG_SLAB_UNRECLAIMABLE, + MEMCG_SOCK, MEMCG_NR_STAT, }; @@ -400,6 +403,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, int nr_pages); +unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, + int nid, unsigned int lru_mask); + static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { @@ -658,6 +664,13 @@ mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, { } +static inline unsigned long +mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, + int nid, unsigned int lru_mask) +{ + return 0; +} + static inline void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { @@ -792,11 +805,6 @@ static inline bool memcg_kmem_enabled(void) return static_branch_unlikely(&memcg_kmem_enabled_key); } -static inline bool memcg_kmem_online(struct mem_cgroup *memcg) -{ - return memcg->kmem_state == KMEM_ONLINE; -} - /* * In general, we'll do everything in our power to not incur in any overhead * for non-memcg users for the kmem functions. Not even a function call, if we @@ -883,6 +891,20 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) if (memcg_kmem_enabled()) __memcg_kmem_put_cache(cachep); } + +/** + * memcg_kmem_update_page_stat - update kmem page state statistics + * @page: the page + * @idx: page state item to account + * @val: number of pages (positive or negative) + */ +static inline void memcg_kmem_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) +{ + if (memcg_kmem_enabled() && page->mem_cgroup) + this_cpu_add(page->mem_cgroup->stat->count[idx], val); +} + #else #define for_each_memcg_cache_index(_idx) \ for (; NULL; ) @@ -892,11 +914,6 @@ static inline bool memcg_kmem_enabled(void) return false; } -static inline bool memcg_kmem_online(struct mem_cgroup *memcg) -{ - return false; -} - static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) { return 0; @@ -928,6 +945,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) { } + +static inline void memcg_kmem_update_page_stat(struct page *page, + enum mem_cgroup_stat_index idx, int val) +{ +} #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index dbf1edd..7d42501 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -22,6 +22,7 @@ #include <linux/resource.h> #include <linux/page_ext.h> #include <linux/err.h> +#include <linux/page_ref.h> struct mempolicy; struct anon_vma; @@ -82,6 +83,27 @@ extern int mmap_rnd_compat_bits __read_mostly; #define mm_forbids_zeropage(X) (0) #endif +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + * + * ELF extended numbering allows more than 65535 sections, so 16-bit bound is + * not a hard limit any more. Although some userspace tools can be surprised by + * that. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) + +extern int sysctl_max_map_count; + extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; @@ -122,6 +144,7 @@ extern unsigned int kobjsize(const void *objp); /* * vm_flags in vm_area_struct, see mm_types.h. + * When changing, update also include/trace/events/mmflags.h */ #define VM_NONE 0x00000000 @@ -364,8 +387,8 @@ static inline int pmd_devmap(pmd_t pmd) */ static inline int put_page_testzero(struct page *page) { - VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page); - return atomic_dec_and_test(&page->_count); + VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); + return page_ref_dec_and_test(page); } /* @@ -376,7 +399,7 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { - return atomic_inc_not_zero(&page->_count); + return page_ref_add_unless(page, 1, 0); } extern int page_is_ram(unsigned long pfn); @@ -464,11 +487,6 @@ static inline int total_mapcount(struct page *page) } #endif -static inline int page_count(struct page *page) -{ - return atomic_read(&compound_head(page)->_count); -} - static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); @@ -476,15 +494,6 @@ static inline struct page *virt_to_head_page(const void *x) return compound_head(page); } -/* - * Setup the page count before being freed into the page allocator for - * the first time (boot or memory hotplug) - */ -static inline void init_page_count(struct page *page) -{ - atomic_set(&page->_count, 1); -} - void __put_page(struct page *page); void put_pages_list(struct list_head *pages); @@ -694,8 +703,8 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); - atomic_inc(&page->_count); + VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); + page_ref_inc(page); if (unlikely(is_zone_device_page(page))) get_zone_device_page(page); @@ -1043,8 +1052,6 @@ static inline void clear_page_pfmemalloc(struct page *page) * just gets major/minor fault counters bumped up. */ -#define VM_FAULT_MINOR 0 /* For backwards compat. Remove me quickly. */ - #define VM_FAULT_OOM 0x0001 #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 @@ -1523,8 +1530,7 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm) } #endif -int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, - pmd_t *pmd, unsigned long address); +int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); /* @@ -1650,15 +1656,15 @@ static inline void pgtable_page_dtor(struct page *page) pte_unmap(pte); \ } while (0) -#define pte_alloc_map(mm, vma, pmd, address) \ - ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \ - pmd, address))? \ - NULL: pte_offset_map(pmd, address)) +#define pte_alloc(mm, pmd, address) \ + (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd, address)) + +#define pte_alloc_map(mm, pmd, address) \ + (pte_alloc(mm, pmd, address) ? NULL : pte_offset_map(pmd, address)) #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ - ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \ - pmd, address))? \ - NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) + (pte_alloc(mm, pmd, address) ? \ + NULL : pte_offset_map_lock(mm, pmd, address, ptlp)) #define pte_alloc_kernel(pmd, address) \ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ @@ -1853,6 +1859,7 @@ extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); extern void __init mmap_init(void); extern void show_mem(unsigned int flags); +extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); @@ -1867,6 +1874,7 @@ extern void zone_pcp_reset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; +extern int watermark_scale_factor; /* nommu.c */ extern atomic_long_t mmap_pages_allocated; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6de02ac3..c60df92 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -668,6 +668,12 @@ typedef struct pglist_data { mem_hotplug_begin/end() */ int kswapd_max_order; enum zone_type classzone_idx; +#ifdef CONFIG_COMPACTION + int kcompactd_max_order; + enum zone_type kcompactd_classzone_idx; + wait_queue_head_t kcompactd_wait; + struct task_struct *kcompactd; +#endif #ifdef CONFIG_NUMA_BALANCING /* Lock serializing the migrate rate limiting window */ spinlock_t numabalancing_migrate_lock; @@ -835,6 +841,8 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1]; int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index da52366..77b078c 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -17,6 +17,8 @@ #define ZONES_SHIFT 1 #elif MAX_NR_ZONES <= 4 #define ZONES_SHIFT 2 +#elif MAX_NR_ZONES <= 8 +#define ZONES_SHIFT 3 #else #error ZONES_SHIFT -- too many zones configured adjust calculation #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 19724e6..f4ed4f1b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -144,12 +144,12 @@ static inline struct page *compound_head(struct page *page) return page; } -static inline int PageTail(struct page *page) +static __always_inline int PageTail(struct page *page) { return READ_ONCE(page->compound_head) & 1; } -static inline int PageCompound(struct page *page) +static __always_inline int PageCompound(struct page *page) { return test_bit(PG_head, &page->flags) || PageTail(page); } @@ -184,31 +184,31 @@ static inline int PageCompound(struct page *page) * Macros to create function definitions for page flags */ #define TESTPAGEFLAG(uname, lname, policy) \ -static inline int Page##uname(struct page *page) \ +static __always_inline int Page##uname(struct page *page) \ { return test_bit(PG_##lname, &policy(page, 0)->flags); } #define SETPAGEFLAG(uname, lname, policy) \ -static inline void SetPage##uname(struct page *page) \ +static __always_inline void SetPage##uname(struct page *page) \ { set_bit(PG_##lname, &policy(page, 1)->flags); } #define CLEARPAGEFLAG(uname, lname, policy) \ -static inline void ClearPage##uname(struct page *page) \ +static __always_inline void ClearPage##uname(struct page *page) \ { clear_bit(PG_##lname, &policy(page, 1)->flags); } #define __SETPAGEFLAG(uname, lname, policy) \ -static inline void __SetPage##uname(struct page *page) \ +static __always_inline void __SetPage##uname(struct page *page) \ { __set_bit(PG_##lname, &policy(page, 1)->flags); } #define __CLEARPAGEFLAG(uname, lname, policy) \ -static inline void __ClearPage##uname(struct page *page) \ +static __always_inline void __ClearPage##uname(struct page *page) \ { __clear_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTSETFLAG(uname, lname, policy) \ -static inline int TestSetPage##uname(struct page *page) \ +static __always_inline int TestSetPage##uname(struct page *page) \ { return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); } #define TESTCLEARFLAG(uname, lname, policy) \ -static inline int TestClearPage##uname(struct page *page) \ +static __always_inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); } #define PAGEFLAG(uname, lname, policy) \ @@ -371,7 +371,7 @@ PAGEFLAG(Idle, idle, PF_ANY) #define PAGE_MAPPING_KSM 2 #define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) -static inline int PageAnon(struct page *page) +static __always_inline int PageAnon(struct page *page) { page = compound_head(page); return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; @@ -384,7 +384,7 @@ static inline int PageAnon(struct page *page) * is found in VM_MERGEABLE vmas. It's a PageAnon page, pointing not to any * anon_vma, but to that page's node of the stable tree. */ -static inline int PageKsm(struct page *page) +static __always_inline int PageKsm(struct page *page) { page = compound_head(page); return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) == @@ -415,14 +415,14 @@ static inline int PageUptodate(struct page *page) return ret; } -static inline void __SetPageUptodate(struct page *page) +static __always_inline void __SetPageUptodate(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); smp_wmb(); __set_bit(PG_uptodate, &page->flags); } -static inline void SetPageUptodate(struct page *page) +static __always_inline void SetPageUptodate(struct page *page) { VM_BUG_ON_PAGE(PageTail(page), page); /* @@ -456,12 +456,12 @@ static inline void set_page_writeback_keepwrite(struct page *page) __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY) -static inline void set_compound_head(struct page *page, struct page *head) +static __always_inline void set_compound_head(struct page *page, struct page *head) { WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } -static inline void clear_compound_head(struct page *page) +static __always_inline void clear_compound_head(struct page *page) { WRITE_ONCE(page->compound_head, 0); } @@ -593,6 +593,8 @@ static inline void __ClearPageBuddy(struct page *page) atomic_set(&page->_mapcount, -1); } +extern bool is_free_buddy_page(struct page *page); + #define PAGE_BALLOON_MAPCOUNT_VALUE (-256) static inline int PageBalloon(struct page *page) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h new file mode 100644 index 0000000..e596d5d9 --- /dev/null +++ b/include/linux/page_ref.h @@ -0,0 +1,173 @@ +#ifndef _LINUX_PAGE_REF_H +#define _LINUX_PAGE_REF_H + +#include <linux/atomic.h> +#include <linux/mm_types.h> +#include <linux/page-flags.h> +#include <linux/tracepoint-defs.h> + +extern struct tracepoint __tracepoint_page_ref_set; +extern struct tracepoint __tracepoint_page_ref_mod; +extern struct tracepoint __tracepoint_page_ref_mod_and_test; +extern struct tracepoint __tracepoint_page_ref_mod_and_return; +extern struct tracepoint __tracepoint_page_ref_mod_unless; +extern struct tracepoint __tracepoint_page_ref_freeze; +extern struct tracepoint __tracepoint_page_ref_unfreeze; + +#ifdef CONFIG_DEBUG_PAGE_REF + +/* + * Ideally we would want to use the trace_<tracepoint>_enabled() helper + * functions. But due to include header file issues, that is not + * feasible. Instead we have to open code the static key functions. + * + * See trace_##name##_enabled(void) in include/linux/tracepoint.h + */ +#define page_ref_tracepoint_active(t) static_key_false(&(t).key) + +extern void __page_ref_set(struct page *page, int v); +extern void __page_ref_mod(struct page *page, int v); +extern void __page_ref_mod_and_test(struct page *page, int v, int ret); +extern void __page_ref_mod_and_return(struct page *page, int v, int ret); +extern void __page_ref_mod_unless(struct page *page, int v, int u); +extern void __page_ref_freeze(struct page *page, int v, int ret); +extern void __page_ref_unfreeze(struct page *page, int v); + +#else + +#define page_ref_tracepoint_active(t) false + +static inline void __page_ref_set(struct page *page, int v) +{ +} +static inline void __page_ref_mod(struct page *page, int v) +{ +} +static inline void __page_ref_mod_and_test(struct page *page, int v, int ret) +{ +} +static inline void __page_ref_mod_and_return(struct page *page, int v, int ret) +{ +} +static inline void __page_ref_mod_unless(struct page *page, int v, int u) +{ +} +static inline void __page_ref_freeze(struct page *page, int v, int ret) +{ +} +static inline void __page_ref_unfreeze(struct page *page, int v) +{ +} + +#endif + +static inline int page_ref_count(struct page *page) +{ + return atomic_read(&page->_count); +} + +static inline int page_count(struct page *page) +{ + return atomic_read(&compound_head(page)->_count); +} + +static inline void set_page_count(struct page *page, int v) +{ + atomic_set(&page->_count, v); + if (page_ref_tracepoint_active(__tracepoint_page_ref_set)) + __page_ref_set(page, v); +} + +/* + * Setup the page count before being freed into the page allocator for + * the first time (boot or memory hotplug) + */ +static inline void init_page_count(struct page *page) +{ + set_page_count(page, 1); +} + +static inline void page_ref_add(struct page *page, int nr) +{ + atomic_add(nr, &page->_count); + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + __page_ref_mod(page, nr); +} + +static inline void page_ref_sub(struct page *page, int nr) +{ + atomic_sub(nr, &page->_count); + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + __page_ref_mod(page, -nr); +} + +static inline void page_ref_inc(struct page *page) +{ + atomic_inc(&page->_count); + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + __page_ref_mod(page, 1); +} + +static inline void page_ref_dec(struct page *page) +{ + atomic_dec(&page->_count); + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod)) + __page_ref_mod(page, -1); +} + +static inline int page_ref_sub_and_test(struct page *page, int nr) +{ + int ret = atomic_sub_and_test(nr, &page->_count); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) + __page_ref_mod_and_test(page, -nr, ret); + return ret; +} + +static inline int page_ref_dec_and_test(struct page *page) +{ + int ret = atomic_dec_and_test(&page->_count); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_test)) + __page_ref_mod_and_test(page, -1, ret); + return ret; +} + +static inline int page_ref_dec_return(struct page *page) +{ + int ret = atomic_dec_return(&page->_count); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + __page_ref_mod_and_return(page, -1, ret); + return ret; +} + +static inline int page_ref_add_unless(struct page *page, int nr, int u) +{ + int ret = atomic_add_unless(&page->_count, nr, u); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_unless)) + __page_ref_mod_unless(page, nr, ret); + return ret; +} + +static inline int page_ref_freeze(struct page *page, int count) +{ + int ret = likely(atomic_cmpxchg(&page->_count, count, 0) == count); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_freeze)) + __page_ref_freeze(page, count, ret); + return ret; +} + +static inline void page_ref_unfreeze(struct page *page, int count) +{ + VM_BUG_ON_PAGE(page_count(page) != 0, page); + VM_BUG_ON(count == 0); + + atomic_set(&page->_count, count); + if (page_ref_tracepoint_active(__tracepoint_page_ref_unfreeze)) + __page_ref_unfreeze(page, count); +} + +#endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 183b15e..1ebd65c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -165,7 +165,7 @@ static inline int page_cache_get_speculative(struct page *page) * SMP requires. */ VM_BUG_ON_PAGE(page_count(page) == 0, page); - atomic_inc(&page->_count); + page_ref_inc(page); #else if (unlikely(!get_page_unless_zero(page))) { @@ -194,10 +194,10 @@ static inline int page_cache_add_speculative(struct page *page, int count) VM_BUG_ON(!in_atomic()); # endif VM_BUG_ON_PAGE(page_count(page) == 0, page); - atomic_add(count, &page->_count); + page_ref_add(page, count); #else - if (unlikely(!atomic_add_unless(&page->_count, count, 0))) + if (unlikely(!page_ref_add_unless(page, count, 0))) return 0; #endif VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page); @@ -205,19 +205,6 @@ static inline int page_cache_add_speculative(struct page *page, int count) return 1; } -static inline int page_freeze_refs(struct page *page, int count) -{ - return likely(atomic_cmpxchg(&page->_count, count, 0) == count); -} - -static inline void page_unfreeze_refs(struct page *page, int count) -{ - VM_BUG_ON_PAGE(page_count(page) != 0, page); - VM_BUG_ON(count == 0); - - atomic_set(&page->_count, count); -} - #ifdef CONFIG_NUMA extern struct page *__page_cache_alloc(gfp_t gfp); #else diff --git a/include/linux/poll.h b/include/linux/poll.h index c08386f..9fb4f40 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -96,7 +96,7 @@ extern void poll_initwait(struct poll_wqueues *pwq); extern void poll_freewait(struct poll_wqueues *pwq); extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); -extern long select_estimate_accuracy(struct timespec *tv); +extern u64 select_estimate_accuracy(struct timespec *tv); static inline int poll_schedule(struct poll_wqueues *pwq, int state) diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h index bd46643..3bdfa70 100644 --- a/include/linux/quicklist.h +++ b/include/linux/quicklist.h @@ -5,7 +5,7 @@ * as needed after allocation when they are freed. Per cpu lists of pages * are kept that only contain node local pages. * - * (C) 2007, SGI. Christoph Lameter <clameter@sgi.com> + * (C) 2007, SGI. Christoph Lameter <cl@linux.com> */ #include <linux/kernel.h> #include <linux/gfp.h> diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index f54be70..51a97ac 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -21,6 +21,7 @@ #ifndef _LINUX_RADIX_TREE_H #define _LINUX_RADIX_TREE_H +#include <linux/bitops.h> #include <linux/preempt.h> #include <linux/types.h> #include <linux/bug.h> @@ -270,8 +271,15 @@ static inline void radix_tree_replace_slot(void **pslot, void *item) } int __radix_tree_create(struct radix_tree_root *root, unsigned long index, - struct radix_tree_node **nodep, void ***slotp); -int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); + unsigned order, struct radix_tree_node **nodep, + void ***slotp); +int __radix_tree_insert(struct radix_tree_root *, unsigned long index, + unsigned order, void *); +static inline int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *entry) +{ + return __radix_tree_insert(root, index, 0, entry); +} void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, struct radix_tree_node **nodep, void ***slotp); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); @@ -395,6 +403,22 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) } /** + * radix_tree_iter_next - resume iterating when the chunk may be invalid + * @iter: iterator state + * + * If the iterator needs to release then reacquire a lock, the chunk may + * have been invalidated by an insertion or deletion. Call this function + * to continue the iteration from the next index. + */ +static inline __must_check +void **radix_tree_iter_next(struct radix_tree_iter *iter) +{ + iter->next_index = iter->index + 1; + iter->tags = 0; + return NULL; +} + +/** * radix_tree_chunk_size - get current chunk size * * @iter: pointer to radix tree iterator diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a07f42b..49eb4f8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -86,6 +86,7 @@ enum ttu_flags { TTU_MIGRATION = 2, /* migration mode */ TTU_MUNLOCK = 4, /* munlock mode */ TTU_LZFREE = 8, /* lazy free mode */ + TTU_SPLIT_HUGE_PMD = 16, /* split huge PMD if any */ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ @@ -93,6 +94,8 @@ enum ttu_flags { TTU_BATCH_FLUSH = (1 << 11), /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ + TTU_RMAP_LOCKED = (1 << 12) /* do not grab rmap lock: + * caller holds it */ }; #ifdef CONFIG_MMU @@ -240,6 +243,8 @@ int page_mkclean(struct page *); */ int try_to_munlock(struct page *); +void remove_migration_ptes(struct page *old, struct page *new, bool locked); + /* * Called by memory-failure.c to kill processes. */ @@ -266,6 +271,7 @@ struct rmap_walk_control { }; int rmap_walk(struct page *page, struct rmap_walk_control *rwc); +int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ diff --git a/include/linux/sched.h b/include/linux/sched.h index bd242be..084ed9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1791,8 +1791,8 @@ struct task_struct { * time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ - unsigned long timer_slack_ns; - unsigned long default_timer_slack_ns; + u64 timer_slack_ns; + u64 default_timer_slack_ns; #ifdef CONFIG_KASAN unsigned int kasan_depth; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 4f080ab..22db1e6 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -14,27 +14,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, enum { sysctl_hung_task_timeout_secs = 0 }; #endif -/* - * Default maximum number of active map areas, this limits the number of vmas - * per mm struct. Users can overwrite this number by sysctl but there is a - * problem. - * - * When a program's coredump is generated as ELF format, a section is created - * per a vma. In ELF, the number of sections is represented in unsigned short. - * This means the number of sections should be smaller than 65535 at coredump. - * Because the kernel adds some informative sections to a image of program at - * generating coredump, we need some margin. The number of extra sections is - * 1-3 now and depends on arch. We use "5" as safe margin, here. - * - * ELF extended numbering allows more than 65535 sections, so 16-bit bound is - * not a hard limit any more. Although some userspace tools can be surprised by - * that. - */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) - -extern int sysctl_max_map_count; - extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; diff --git a/include/linux/string.h b/include/linux/string.h index 9eebc66..d3993a7 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -128,7 +128,13 @@ extern char **argv_split(gfp_t gfp, const char *str, int *argcp); extern void argv_free(char **argv); extern bool sysfs_streq(const char *s1, const char *s2); -extern int strtobool(const char *s, bool *res); +extern int kstrtobool(const char *s, bool *res); +static inline int strtobool(const char *s, bool *res) +{ + return kstrtobool(s, res); +} + +int match_string(const char * const *array, size_t n, const char *string); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); diff --git a/include/linux/tick.h b/include/linux/tick.h index 21f7364..62be0786 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -111,7 +111,7 @@ enum tick_dep_bits { #define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE) #ifdef CONFIG_NO_HZ_COMMON -extern int tick_nohz_enabled; +extern bool tick_nohz_enabled; extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h index 99c1b4d..33383ca 100644 --- a/include/linux/unaligned/access_ok.h +++ b/include/linux/unaligned/access_ok.h @@ -4,62 +4,62 @@ #include <linux/kernel.h> #include <asm/byteorder.h> -static inline u16 get_unaligned_le16(const void *p) +static __always_inline u16 get_unaligned_le16(const void *p) { return le16_to_cpup((__le16 *)p); } -static inline u32 get_unaligned_le32(const void *p) +static __always_inline u32 get_unaligned_le32(const void *p) { return le32_to_cpup((__le32 *)p); } -static inline u64 get_unaligned_le64(const void *p) +static __always_inline u64 get_unaligned_le64(const void *p) { return le64_to_cpup((__le64 *)p); } -static inline u16 get_unaligned_be16(const void *p) +static __always_inline u16 get_unaligned_be16(const void *p) { return be16_to_cpup((__be16 *)p); } -static inline u32 get_unaligned_be32(const void *p) +static __always_inline u32 get_unaligned_be32(const void *p) { return be32_to_cpup((__be32 *)p); } -static inline u64 get_unaligned_be64(const void *p) +static __always_inline u64 get_unaligned_be64(const void *p) { return be64_to_cpup((__be64 *)p); } -static inline void put_unaligned_le16(u16 val, void *p) +static __always_inline void put_unaligned_le16(u16 val, void *p) { *((__le16 *)p) = cpu_to_le16(val); } -static inline void put_unaligned_le32(u32 val, void *p) +static __always_inline void put_unaligned_le32(u32 val, void *p) { *((__le32 *)p) = cpu_to_le32(val); } -static inline void put_unaligned_le64(u64 val, void *p) +static __always_inline void put_unaligned_le64(u64 val, void *p) { *((__le64 *)p) = cpu_to_le64(val); } -static inline void put_unaligned_be16(u16 val, void *p) +static __always_inline void put_unaligned_be16(u16 val, void *p) { *((__be16 *)p) = cpu_to_be16(val); } -static inline void put_unaligned_be32(u32 val, void *p) +static __always_inline void put_unaligned_be32(u32 val, void *p) { *((__be32 *)p) = cpu_to_be32(val); } -static inline void put_unaligned_be64(u64 val, void *p) +static __always_inline void put_unaligned_be64(u64 val, void *p) { *((__be64 *)p) = cpu_to_be64(val); } diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 67c1dbd..ec08432 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -53,6 +53,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED, COMPACTISOLATED, COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS, + KCOMPACTD_WAKE, #endif #ifdef CONFIG_HUGETLB_PAGE HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL, @@ -71,6 +72,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_COLLAPSE_ALLOC_FAILED, THP_SPLIT_PAGE, THP_SPLIT_PAGE_FAILED, + THP_DEFERRED_SPLIT_PAGE, THP_SPLIT_PMD, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 111e566..e215bf6 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -350,6 +350,61 @@ DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset, ); #endif +TRACE_EVENT(mm_compaction_kcompactd_sleep, + + TP_PROTO(int nid), + + TP_ARGS(nid), + + TP_STRUCT__entry( + __field(int, nid) + ), + + TP_fast_assign( + __entry->nid = nid; + ), + + TP_printk("nid=%d", __entry->nid) +); + +DECLARE_EVENT_CLASS(kcompactd_wake_template, + + TP_PROTO(int nid, int order, enum zone_type classzone_idx), + + TP_ARGS(nid, order, classzone_idx), + + TP_STRUCT__entry( + __field(int, nid) + __field(int, order) + __field(enum zone_type, classzone_idx) + ), + + TP_fast_assign( + __entry->nid = nid; + __entry->order = order; + __entry->classzone_idx = classzone_idx; + ), + + TP_printk("nid=%d order=%d classzone_idx=%-8s", + __entry->nid, + __entry->order, + __print_symbolic(__entry->classzone_idx, ZONE_TYPE)) +); + +DEFINE_EVENT(kcompactd_wake_template, mm_compaction_wakeup_kcompactd, + + TP_PROTO(int nid, int order, enum zone_type classzone_idx), + + TP_ARGS(nid, order, classzone_idx) +); + +DEFINE_EVENT(kcompactd_wake_template, mm_compaction_kcompactd_wake, + + TP_PROTO(int nid, int order, enum zone_type classzone_idx), + + TP_ARGS(nid, order, classzone_idx) +); + #endif /* _TRACE_COMPACTION_H */ /* This part must be outside protection */ diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index a849185..43cedbf0c 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -111,15 +111,21 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) ) : "none" #if defined(CONFIG_X86) -#define __VM_ARCH_SPECIFIC {VM_PAT, "pat" } +#define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) -#define __VM_ARCH_SPECIFIC {VM_SAO, "sao" } +#define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64) -#define __VM_ARCH_SPECIFIC {VM_GROWSUP, "growsup" } +#define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) -#define __VM_ARCH_SPECIFIC {VM_MAPPED_COPY,"mappedcopy" } +#define __VM_ARCH_SPECIFIC_1 {VM_MAPPED_COPY,"mappedcopy" } #else -#define __VM_ARCH_SPECIFIC {VM_ARCH_1, "arch_1" } +#define __VM_ARCH_SPECIFIC_1 {VM_ARCH_1, "arch_1" } +#endif + +#if defined(CONFIG_X86) +#define __VM_ARCH_SPECIFIC_2 {VM_MPX, "mpx" } +#else +#define __VM_ARCH_SPECIFIC_2 {VM_ARCH_2, "arch_2" } #endif #ifdef CONFIG_MEM_SOFT_DIRTY @@ -138,19 +144,22 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" ) {VM_MAYEXEC, "mayexec" }, \ {VM_MAYSHARE, "mayshare" }, \ {VM_GROWSDOWN, "growsdown" }, \ + {VM_UFFD_MISSING, "uffd_missing" }, \ {VM_PFNMAP, "pfnmap" }, \ {VM_DENYWRITE, "denywrite" }, \ - {VM_LOCKONFAULT, "lockonfault" }, \ + {VM_UFFD_WP, "uffd_wp" }, \ {VM_LOCKED, "locked" }, \ {VM_IO, "io" }, \ {VM_SEQ_READ, "seqread" }, \ {VM_RAND_READ, "randread" }, \ {VM_DONTCOPY, "dontcopy" }, \ {VM_DONTEXPAND, "dontexpand" }, \ + {VM_LOCKONFAULT, "lockonfault" }, \ {VM_ACCOUNT, "account" }, \ {VM_NORESERVE, "noreserve" }, \ {VM_HUGETLB, "hugetlb" }, \ - __VM_ARCH_SPECIFIC , \ + __VM_ARCH_SPECIFIC_1 , \ + __VM_ARCH_SPECIFIC_2 , \ {VM_DONTDUMP, "dontdump" }, \ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_MIXEDMAP, "mixedmap" }, \ diff --git a/include/trace/events/page_ref.h b/include/trace/events/page_ref.h new file mode 100644 index 0000000..81001f8 --- /dev/null +++ b/include/trace/events/page_ref.h @@ -0,0 +1,134 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM page_ref + +#if !defined(_TRACE_PAGE_REF_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_REF_H + +#include <linux/types.h> +#include <linux/page_ref.h> +#include <linux/tracepoint.h> +#include <trace/events/mmflags.h> + +DECLARE_EVENT_CLASS(page_ref_mod_template, + + TP_PROTO(struct page *page, int v), + + TP_ARGS(page, v), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(unsigned long, flags) + __field(int, count) + __field(int, mapcount) + __field(void *, mapping) + __field(int, mt) + __field(int, val) + ), + + TP_fast_assign( + __entry->pfn = page_to_pfn(page); + __entry->flags = page->flags; + __entry->count = page_ref_count(page); + __entry->mapcount = page_mapcount(page); + __entry->mapping = page->mapping; + __entry->mt = get_pageblock_migratetype(page); + __entry->val = v; + ), + + TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d", + __entry->pfn, + show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)), + __entry->count, + __entry->mapcount, __entry->mapping, __entry->mt, + __entry->val) +); + +DEFINE_EVENT(page_ref_mod_template, page_ref_set, + + TP_PROTO(struct page *page, int v), + + TP_ARGS(page, v) +); + +DEFINE_EVENT(page_ref_mod_template, page_ref_mod, + + TP_PROTO(struct page *page, int v), + + TP_ARGS(page, v) +); + +DECLARE_EVENT_CLASS(page_ref_mod_and_test_template, + + TP_PROTO(struct page *page, int v, int ret), + + TP_ARGS(page, v, ret), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(unsigned long, flags) + __field(int, count) + __field(int, mapcount) + __field(void *, mapping) + __field(int, mt) + __field(int, val) + __field(int, ret) + ), + + TP_fast_assign( + __entry->pfn = page_to_pfn(page); + __entry->flags = page->flags; + __entry->count = page_ref_count(page); + __entry->mapcount = page_mapcount(page); + __entry->mapping = page->mapping; + __entry->mt = get_pageblock_migratetype(page); + __entry->val = v; + __entry->ret = ret; + ), + + TP_printk("pfn=0x%lx flags=%s count=%d mapcount=%d mapping=%p mt=%d val=%d ret=%d", + __entry->pfn, + show_page_flags(__entry->flags & ((1UL << NR_PAGEFLAGS) - 1)), + __entry->count, + __entry->mapcount, __entry->mapping, __entry->mt, + __entry->val, __entry->ret) +); + +DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_test, + + TP_PROTO(struct page *page, int v, int ret), + + TP_ARGS(page, v, ret) +); + +DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_and_return, + + TP_PROTO(struct page *page, int v, int ret), + + TP_ARGS(page, v, ret) +); + +DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_mod_unless, + + TP_PROTO(struct page *page, int v, int ret), + + TP_ARGS(page, v, ret) +); + +DEFINE_EVENT(page_ref_mod_and_test_template, page_ref_freeze, + + TP_PROTO(struct page *page, int v, int ret), + + TP_ARGS(page, v, ret) +); + +DEFINE_EVENT(page_ref_mod_template, page_ref_unfreeze, + + TP_PROTO(struct page *page, int v), + + TP_ARGS(page, v) +); + +#endif /* _TRACE_PAGE_COUNT_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/byteorder/big_endian.h b/include/uapi/linux/byteorder/big_endian.h index 6723744..cdab17a 100644 --- a/include/uapi/linux/byteorder/big_endian.h +++ b/include/uapi/linux/byteorder/big_endian.h @@ -40,51 +40,51 @@ #define __cpu_to_be16(x) ((__force __be16)(__u16)(x)) #define __be16_to_cpu(x) ((__force __u16)(__be16)(x)) -static inline __le64 __cpu_to_le64p(const __u64 *p) +static __always_inline __le64 __cpu_to_le64p(const __u64 *p) { return (__force __le64)__swab64p(p); } -static inline __u64 __le64_to_cpup(const __le64 *p) +static __always_inline __u64 __le64_to_cpup(const __le64 *p) { return __swab64p((__u64 *)p); } -static inline __le32 __cpu_to_le32p(const __u32 *p) +static __always_inline __le32 __cpu_to_le32p(const __u32 *p) { return (__force __le32)__swab32p(p); } -static inline __u32 __le32_to_cpup(const __le32 *p) +static __always_inline __u32 __le32_to_cpup(const __le32 *p) { return __swab32p((__u32 *)p); } -static inline __le16 __cpu_to_le16p(const __u16 *p) +static __always_inline __le16 __cpu_to_le16p(const __u16 *p) { return (__force __le16)__swab16p(p); } -static inline __u16 __le16_to_cpup(const __le16 *p) +static __always_inline __u16 __le16_to_cpup(const __le16 *p) { return __swab16p((__u16 *)p); } -static inline __be64 __cpu_to_be64p(const __u64 *p) +static __always_inline __be64 __cpu_to_be64p(const __u64 *p) { return (__force __be64)*p; } -static inline __u64 __be64_to_cpup(const __be64 *p) +static __always_inline __u64 __be64_to_cpup(const __be64 *p) { return (__force __u64)*p; } -static inline __be32 __cpu_to_be32p(const __u32 *p) +static __always_inline __be32 __cpu_to_be32p(const __u32 *p) { return (__force __be32)*p; } -static inline __u32 __be32_to_cpup(const __be32 *p) +static __always_inline __u32 __be32_to_cpup(const __be32 *p) { return (__force __u32)*p; } -static inline __be16 __cpu_to_be16p(const __u16 *p) +static __always_inline __be16 __cpu_to_be16p(const __u16 *p) { return (__force __be16)*p; } -static inline __u16 __be16_to_cpup(const __be16 *p) +static __always_inline __u16 __be16_to_cpup(const __be16 *p) { return (__force __u16)*p; } diff --git a/include/uapi/linux/byteorder/little_endian.h b/include/uapi/linux/byteorder/little_endian.h index d876736..4b93f2b 100644 --- a/include/uapi/linux/byteorder/little_endian.h +++ b/include/uapi/linux/byteorder/little_endian.h @@ -40,51 +40,51 @@ #define __cpu_to_be16(x) ((__force __be16)__swab16((x))) #define __be16_to_cpu(x) __swab16((__force __u16)(__be16)(x)) -static inline __le64 __cpu_to_le64p(const __u64 *p) +static __always_inline __le64 __cpu_to_le64p(const __u64 *p) { return (__force __le64)*p; } -static inline __u64 __le64_to_cpup(const __le64 *p) +static __always_inline __u64 __le64_to_cpup(const __le64 *p) { return (__force __u64)*p; } -static inline __le32 __cpu_to_le32p(const __u32 *p) +static __always_inline __le32 __cpu_to_le32p(const __u32 *p) { return (__force __le32)*p; } -static inline __u32 __le32_to_cpup(const __le32 *p) +static __always_inline __u32 __le32_to_cpup(const __le32 *p) { return (__force __u32)*p; } -static inline __le16 __cpu_to_le16p(const __u16 *p) +static __always_inline __le16 __cpu_to_le16p(const __u16 *p) { return (__force __le16)*p; } -static inline __u16 __le16_to_cpup(const __le16 *p) +static __always_inline __u16 __le16_to_cpup(const __le16 *p) { return (__force __u16)*p; } -static inline __be64 __cpu_to_be64p(const __u64 *p) +static __always_inline __be64 __cpu_to_be64p(const __u64 *p) { return (__force __be64)__swab64p(p); } -static inline __u64 __be64_to_cpup(const __be64 *p) +static __always_inline __u64 __be64_to_cpup(const __be64 *p) { return __swab64p((__u64 *)p); } -static inline __be32 __cpu_to_be32p(const __u32 *p) +static __always_inline __be32 __cpu_to_be32p(const __u32 *p) { return (__force __be32)__swab32p(p); } -static inline __u32 __be32_to_cpup(const __be32 *p) +static __always_inline __u32 __be32_to_cpup(const __be32 *p) { return __swab32p((__u32 *)p); } -static inline __be16 __cpu_to_be16p(const __u16 *p) +static __always_inline __be16 __cpu_to_be16p(const __u16 *p) { return (__force __be16)__swab16p(p); } -static inline __u16 __be16_to_cpup(const __be16 *p) +static __always_inline __u16 __be16_to_cpup(const __be16 *p) { return __swab16p((__u16 *)p); } diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index b56dfcf..c3fdfe7 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -30,7 +30,6 @@ #define EM_X86_64 62 /* AMD x86-64 */ #define EM_S390 22 /* IBM S/390 */ #define EM_CRIS 76 /* Axis Communications 32-bit embedded processor */ -#define EM_V850 87 /* NEC v850 */ #define EM_M32R 88 /* Renesas M32R */ #define EM_MN10300 89 /* Panasonic/MEI MN10300, AM33 */ #define EM_OPENRISC 92 /* OpenRISC 32-bit embedded processor */ @@ -50,8 +49,6 @@ */ #define EM_ALPHA 0x9026 -/* Bogus old v850 magic number, used by old tools. */ -#define EM_CYGNUS_V850 0x9080 /* Bogus old m32r magic number, used by old tools. */ #define EM_CYGNUS_M32R 0x9041 /* This is the old interim value for S/390 architecture */ diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 0e011eb..3f10e53 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -151,7 +151,7 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) * __swab16p - return a byteswapped 16-bit value from a pointer * @p: pointer to a naturally-aligned 16-bit value */ -static inline __u16 __swab16p(const __u16 *p) +static __always_inline __u16 __swab16p(const __u16 *p) { #ifdef __arch_swab16p return __arch_swab16p(p); @@ -164,7 +164,7 @@ static inline __u16 __swab16p(const __u16 *p) * __swab32p - return a byteswapped 32-bit value from a pointer * @p: pointer to a naturally-aligned 32-bit value */ -static inline __u32 __swab32p(const __u32 *p) +static __always_inline __u32 __swab32p(const __u32 *p) { #ifdef __arch_swab32p return __arch_swab32p(p); @@ -177,7 +177,7 @@ static inline __u32 __swab32p(const __u32 *p) * __swab64p - return a byteswapped 64-bit value from a pointer * @p: pointer to a naturally-aligned 64-bit value */ -static inline __u64 __swab64p(const __u64 *p) +static __always_inline __u64 __swab64p(const __u64 *p) { #ifdef __arch_swab64p return __arch_swab64p(p); @@ -232,7 +232,7 @@ static inline void __swab16s(__u16 *p) * __swab32s - byteswap a 32-bit value in-place * @p: pointer to a naturally-aligned 32-bit value */ -static inline void __swab32s(__u32 *p) +static __always_inline void __swab32s(__u32 *p) { #ifdef __arch_swab32s __arch_swab32s(p); @@ -245,7 +245,7 @@ static inline void __swab32s(__u32 *p) * __swab64s - byteswap a 64-bit value in-place * @p: pointer to a naturally-aligned 64-bit value */ -static inline void __swab64s(__u64 *p) +static __always_inline void __swab64s(__u64 *p) { #ifdef __arch_swab64s __arch_swab64s(p); diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h index d7f1cbc..343d7dd 100644 --- a/include/uapi/linux/virtio_balloon.h +++ b/include/uapi/linux/virtio_balloon.h @@ -51,7 +51,8 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ #define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ -#define VIRTIO_BALLOON_S_NR 6 +#define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */ +#define VIRTIO_BALLOON_S_NR 7 /* * Memory statistics structure. |