From a9aec5881b9d4aca184b29d33484a6a58d23f7f2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 20 Jan 2016 14:58:35 -0800 Subject: lib/iomap_copy.c: add __ioread32_copy() Some drivers need to read data out of iomem areas 32-bits at a time. Add an API to do this. Signed-off-by: Stephen Boyd Cc: Bjorn Andersson Cc: Cc: David Howells Cc: Hauke Mehrtens Cc: Paul Walmsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/io.h b/include/linux/io.h index fffd88d..32403b5 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -29,6 +29,7 @@ struct device; struct resource; __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +void __ioread32_copy(void *to, const void __iomem *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU -- cgit v1.1 From 243c2137cda52599f6112f52b6be5e61fa6536ae Mon Sep 17 00:00:00 2001 From: Adam Barth Date: Wed, 20 Jan 2016 14:59:09 -0800 Subject: include/linux/radix-tree.h: fix error in docs about locks This text refers to the "first 7 functions", which was correct when written but became incorrect when Johannes Weiner added another function to the list in 139e561660fe ("lib: radix_tree: tree node interface"). Change the text to correctly refer to the first 8 functions. Signed-off-by: Adam Barth Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 33170db..57e7d87 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -154,7 +154,7 @@ do { \ * radix_tree_gang_lookup_tag_slot * radix_tree_tagged * - * The first 7 functions are able to be called locklessly, using RCU. The + * The first 8 functions are able to be called locklessly, using RCU. The * caller must ensure calls to these functions are made within rcu_read_lock() * regions. Other readers (lock-free or otherwise) and modifications may be * running concurrently. -- cgit v1.1 From df0108c5da561c66c333bb46bfe3c1fc65905898 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Wed, 20 Jan 2016 14:59:24 -0800 Subject: epoll: add EPOLLEXCLUSIVE flag Currently, epoll file descriptors or epfds (the fd returned from epoll_create[1]()) that are added to a shared wakeup source are always added in a non-exclusive manner. This means that when we have multiple epfds attached to a shared fd source they are all woken up. This creates thundering herd type behavior. Introduce a new 'EPOLLEXCLUSIVE' flag that can be passed as part of the 'event' argument during an epoll_ctl() EPOLL_CTL_ADD operation. This new flag allows for exclusive wakeups when there are multiple epfds attached to a shared fd event source. The implementation walks the list of exclusive waiters, and queues an event to each epfd, until it finds the first waiter that has threads blocked on it via epoll_wait(). The idea is to search for threads which are idle and ready to process the wakeup events. Thus, we queue an event to at least 1 epfd, but may still potentially queue an event to all epfds that are attached to the shared fd source. Performance testing was done by Madars Vitolins using a modified version of Enduro/X. The use of the 'EPOLLEXCLUSIVE' flag reduce the length of this particular workload from 860s down to 24s. Sample epoll_clt text: EPOLLEXCLUSIVE Sets an exclusive wakeup mode for the epfd file descriptor that is being attached to the target file descriptor, fd. Thus, when an event occurs and multiple epfd file descriptors are attached to the same target file using EPOLLEXCLUSIVE, one or more epfds will receive an event with epoll_wait(2). The default in this scenario (when EPOLLEXCLUSIVE is not set) is for all epfds to receive an event. EPOLLEXCLUSIVE may only be specified with the op EPOLL_CTL_ADD. Signed-off-by: Jason Baron Tested-by: Madars Vitolins Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Al Viro Cc: Michael Kerrisk Cc: Eric Wong Cc: Jonathan Corbet Cc: Andy Lutomirski Cc: Hagen Paul Pfeifer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/eventpoll.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index bc81fb2..1c31549 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -26,6 +26,9 @@ #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 +/* Set exclusive wakeup mode for the target file descriptor */ +#define EPOLLEXCLUSIVE (1 << 28) + /* * Request the handling of system wakeup events so as to prevent system suspends * from happening while those events are being processed. -- cgit v1.1 From caaee6234d05a58c5b4d05e7bf766131b810a657 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jan 2016 15:00:04 -0800 Subject: ptrace: use fsuid, fsgid, effective creds for fs access checks By checking the effective credentials instead of the real UID / permitted capabilities, ensure that the calling process actually intended to use its credentials. To ensure that all ptrace checks use the correct caller credentials (e.g. in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS flag), use two new flags and require one of them to be set. The problem was that when a privileged task had temporarily dropped its privileges, e.g. by calling setreuid(0, user_uid), with the intent to perform following syscalls with the credentials of a user, it still passed ptrace access checks that the user would not be able to pass. While an attacker should not be able to convince the privileged task to perform a ptrace() syscall, this is a problem because the ptrace access check is reused for things in procfs. In particular, the following somewhat interesting procfs entries only rely on ptrace access checks: /proc/$pid/stat - uses the check for determining whether pointers should be visible, useful for bypassing ASLR /proc/$pid/maps - also useful for bypassing ASLR /proc/$pid/cwd - useful for gaining access to restricted directories that contain files with lax permissions, e.g. in this scenario: lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar drwx------ root root /root drwxr-xr-x root root /root/foobar -rw-r--r-- root root /root/foobar/secret Therefore, on a system where a root-owned mode 6755 binary changes its effective credentials as described and then dumps a user-specified file, this could be used by an attacker to reveal the memory layout of root's processes or reveal the contents of files he is not allowed to access (through /proc/$pid/cwd). [akpm@linux-foundation.org: fix warning] Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Casey Schaufler Cc: Oleg Nesterov Cc: Ingo Molnar Cc: James Morris Cc: "Serge E. Hallyn" Cc: Andy Shevchenko Cc: Andy Lutomirski Cc: Al Viro Cc: "Eric W. Biederman" Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 061265f..504c98a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -57,7 +57,29 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -/* Returns true on success, false on denial. */ +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 + +/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ +#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) + +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must + * be set in @mode to specify whether the access was requested through + * a filesystem syscall (should use effective capabilities and fsuid + * of the caller) or through an explicit syscall such as + * process_vm_writev or ptrace (and should use the real credentials). + */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) -- cgit v1.1 From 4b804c85dc37db6c108832b28cd54673ff7ee037 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:19 -0800 Subject: kernel/cpu.c: export __cpu_*_mask Exporting the cpumasks __cpu_possible_mask and friends will allow us to remove the extra indirection through the cpu_*_mask variables. It will also allow the set_cpu_* functions to become static inlines, which will give a .text reduction. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 59915ea..d4545a1 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -89,6 +89,10 @@ extern const struct cpumask *const cpu_possible_mask; extern const struct cpumask *const cpu_online_mask; extern const struct cpumask *const cpu_present_mask; extern const struct cpumask *const cpu_active_mask; +extern struct cpumask __cpu_possible_mask; +extern struct cpumask __cpu_online_mask; +extern struct cpumask __cpu_present_mask; +extern struct cpumask __cpu_active_mask; #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) -- cgit v1.1 From 5aec01b834fd6f8ca49d1aeede665b950d0c148e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:25 -0800 Subject: kernel/cpu.c: eliminate cpu_*_mask Replace the variables cpu_possible_mask, cpu_online_mask, cpu_present_mask and cpu_active_mask with macros expanding to expressions of the same type and value, eliminating some indirection. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index d4545a1..52ab539 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -85,14 +85,14 @@ extern int nr_cpu_ids; * only one CPU. */ -extern const struct cpumask *const cpu_possible_mask; -extern const struct cpumask *const cpu_online_mask; -extern const struct cpumask *const cpu_present_mask; -extern const struct cpumask *const cpu_active_mask; extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; +#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) +#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) +#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) +#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #if NR_CPUS > 1 #define num_online_cpus() cpumask_weight(cpu_online_mask) -- cgit v1.1 From 9425676a363c0976e3d43dda792dc4711a651d1d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 20 Jan 2016 15:00:28 -0800 Subject: kernel/cpu.c: make set_cpu_* static inlines Almost all callers of the set_cpu_* functions pass an explicit true or false. Making them static inline thus replaces the function calls with a simple set_bit/clear_bit, saving some .text. Signed-off-by: Rasmus Villemoes Acked-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 43 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 52ab539..fc14275 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -720,14 +720,49 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) /* Wrappers for arch boot code to manipulate normally-constant masks */ -void set_cpu_possible(unsigned int cpu, bool possible); -void set_cpu_present(unsigned int cpu, bool present); -void set_cpu_online(unsigned int cpu, bool online); -void set_cpu_active(unsigned int cpu, bool active); void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); +static inline void +set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, &__cpu_possible_mask); + else + cpumask_clear_cpu(cpu, &__cpu_possible_mask); +} + +static inline void +set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, &__cpu_present_mask); + else + cpumask_clear_cpu(cpu, &__cpu_present_mask); +} + +static inline void +set_cpu_online(unsigned int cpu, bool online) +{ + if (online) { + cpumask_set_cpu(cpu, &__cpu_online_mask); + cpumask_set_cpu(cpu, &__cpu_active_mask); + } else { + cpumask_clear_cpu(cpu, &__cpu_online_mask); + } +} + +static inline void +set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, &__cpu_active_mask); + else + cpumask_clear_cpu(cpu, &__cpu_active_mask); +} + + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap -- cgit v1.1 From 978e30c9b46161c792ecdad0091fd017b21b8ca5 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jan 2016 15:00:36 -0800 Subject: kexec: move some memembers and definitions within the scope of CONFIG_KEXEC_FILE Move the stuff currently only used by the kexec file code within CONFIG_KEXEC_FILE (and CONFIG_KEXEC_VERIFY_SIG). Also move internal "struct kexec_sha_region" and "struct kexec_buf" into "kexec_internal.h". Signed-off-by: Xunlei Pang Cc: "Eric W. Biederman" Cc: Dave Young Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kexec.h | 62 +++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 7b68d27..2cc643c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -109,11 +109,7 @@ struct compat_kexec_segment { }; #endif -struct kexec_sha_region { - unsigned long start; - unsigned long len; -}; - +#ifdef CONFIG_KEXEC_FILE struct purgatory_info { /* Pointer to elf header of read only purgatory */ Elf_Ehdr *ehdr; @@ -130,6 +126,28 @@ struct purgatory_info { unsigned long purgatory_load_addr; }; +typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size); +typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len); +typedef int (kexec_cleanup_t)(void *loader_data); + +#ifdef CONFIG_KEXEC_VERIFY_SIG +typedef int (kexec_verify_sig_t)(const char *kernel_buf, + unsigned long kernel_len); +#endif + +struct kexec_file_ops { + kexec_probe_t *probe; + kexec_load_t *load; + kexec_cleanup_t *cleanup; +#ifdef CONFIG_KEXEC_VERIFY_SIG + kexec_verify_sig_t *verify_sig; +#endif +}; +#endif + struct kimage { kimage_entry_t head; kimage_entry_t *entry; @@ -161,6 +179,7 @@ struct kimage { struct kimage_arch arch; #endif +#ifdef CONFIG_KEXEC_FILE /* Additional fields for file based kexec syscall */ void *kernel_buf; unsigned long kernel_buf_len; @@ -179,38 +198,7 @@ struct kimage { /* Information for loading purgatory */ struct purgatory_info purgatory_info; -}; - -/* - * Keeps track of buffer parameters as provided by caller for requesting - * memory placement of buffer. - */ -struct kexec_buf { - struct kimage *image; - char *buffer; - unsigned long bufsz; - unsigned long mem; - unsigned long memsz; - unsigned long buf_align; - unsigned long buf_min; - unsigned long buf_max; - bool top_down; /* allocate from top of memory hole */ -}; - -typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size); -typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len); -typedef int (kexec_cleanup_t)(void *loader_data); -typedef int (kexec_verify_sig_t)(const char *kernel_buf, - unsigned long kernel_len); - -struct kexec_file_ops { - kexec_probe_t *probe; - kexec_load_t *load; - kexec_cleanup_t *cleanup; - kexec_verify_sig_t *verify_sig; +#endif }; /* kexec interface functions */ -- cgit v1.1 From a460bece027301e079b9e53c5e0f67c8e3eaebc1 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Jan 2016 15:00:42 -0800 Subject: rbtree: use READ_ONCE in RB_EMPTY_ROOT With commit d72da4a4d97 ("rbtree: Make lockless searches non-fatal") our rbtrees provide weak guarantees that allows us to do lockless (and very speculative) reads of the tree. Such readers cannot see partial stores on nodes, ie left/right as well as root. As such, similar to the WRITE_ONCE semantics when doing rotations, use READ_ONCE when checking the root node in RB_EMPTY_ROOT. Signed-off-by: Davidlohr Bueso Acked-by: Peter Zijlstra (Intel) Cc: Michel Lespinasse Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index a5aa7ae..b690009 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -50,7 +50,7 @@ struct rb_root { #define RB_ROOT (struct rb_root) { NULL, } #define rb_entry(ptr, type, member) container_of(ptr, type, member) -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) +#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) /* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ #define RB_EMPTY_NODE(node) \ -- cgit v1.1 From c6d308534aef6c99904bf5862066360ae067abc4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 20 Jan 2016 15:00:55 -0800 Subject: UBSAN: run-time undefined behavior sanity checker UBSAN uses compile-time instrumentation to catch undefined behavior (UB). Compiler inserts code that perform certain kinds of checks before operations that could cause UB. If check fails (i.e. UB detected) __ubsan_handle_* function called to print error message. So the most of the work is done by compiler. This patch just implements ubsan handlers printing errors. GCC has this capability since 4.9.x [1] (see -fsanitize=undefined option and its suboptions). However GCC 5.x has more checkers implemented [2]. Article [3] has a bit more details about UBSAN in the GCC. [1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html [2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html [3] - http://developerblog.redhat.com/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/ Issues which UBSAN has found thus far are: Found bugs: * out-of-bounds access - 97840cb67ff5 ("netfilter: nfnetlink: fix insufficient validation in nfnetlink_bind") undefined shifts: * d48458d4a768 ("jbd2: use a better hash function for the revoke table") * 10632008b9e1 ("clockevents: Prevent shift out of bounds") * 'x << -1' shift in ext4 - http://lkml.kernel.org/r/<5444EF21.8020501@samsung.com> * undefined rol32(0) - http://lkml.kernel.org/r/<1449198241-20654-1-git-send-email-sasha.levin@oracle.com> * undefined dirty_ratelimit calculation - http://lkml.kernel.org/r/<566594E2.3050306@odin.com> * undefined roundown_pow_of_two(0) - http://lkml.kernel.org/r/<1449156616-11474-1-git-send-email-sasha.levin@oracle.com> * [WONTFIX] undefined shift in __bpf_prog_run - http://lkml.kernel.org/r/ WONTFIX here because it should be fixed in bpf program, not in kernel. signed overflows: * 32a8df4e0b33f ("sched: Fix odd values in effective_load() calculations") * mul overflow in ntp - http://lkml.kernel.org/r/<1449175608-1146-1-git-send-email-sasha.levin@oracle.com> * incorrect conversion into rtc_time in rtc_time64_to_tm() - http://lkml.kernel.org/r/<1449187944-11730-1-git-send-email-sasha.levin@oracle.com> * unvalidated timespec in io_getevents() - http://lkml.kernel.org/r/ * [NOTABUG] signed overflow in ktime_add_safe() - http://lkml.kernel.org/r/ [akpm@linux-foundation.org: fix unused local warning] [akpm@linux-foundation.org: fix __int128 build woes] Signed-off-by: Andrey Ryabinin Cc: Peter Zijlstra Cc: Sasha Levin Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: Jonathan Corbet Cc: Michal Marek Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Yury Gribov Cc: Dmitry Vyukov Cc: Konstantin Khlebnikov Cc: Kostya Serebryany Cc: Johannes Berg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61aa9bb..02dabf2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1643,6 +1643,9 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; gfp_t lockdep_reclaim_gfp; #endif +#ifdef CONFIG_UBSAN + unsigned int in_ubsan; +#endif /* journalling filesystem info */ void *journal_info; -- cgit v1.1 From 06af1c52c9ea234e0b1266cc0b52c3e0c6c8fe9f Mon Sep 17 00:00:00 2001 From: Bongkyu Kim Date: Wed, 20 Jan 2016 15:01:08 -0800 Subject: lz4: fix wrong compress buffer size for 64-bits The current lz4 compress buffer is 16kb on 32-bits, 32kb on 64-bits system. But, lz4 needs only 16kb on both. On 64-bits, this causes wasted cpu cycles for additional memset during every compression. In case of lz4hc, the current buffer size is (256kb + 8) on 32-bits, (512kb + 16) on 64-bits. But, lz4hc needs only (256kb + 2 * pointer) on both. This patch fixes these wrong compress buffer sizes for 64-bits. Signed-off-by: Bongkyu Kim Cc: Chanho Min Cc: Yann Collet Cc: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 4356686..6b784c5 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -9,8 +9,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) -#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) +#define LZ4_MEM_COMPRESS (16384) +#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *))) /* * lz4_compressbound() -- cgit v1.1 From 2954e440be7305134be632a94536b412899490f7 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Wed, 20 Jan 2016 15:01:11 -0800 Subject: ipc/shm.c: is_file_shm_hugepages() can be boolean Make is_file_shm_hugepages() return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/shm.h b/include/linux/shm.h index 6fb8016..04e8818 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -52,7 +52,7 @@ struct sysv_shm { long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); -int is_file_shm_hugepages(struct file *file); +bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else @@ -66,9 +66,9 @@ static inline long do_shmat(int shmid, char __user *shmaddr, { return -ENOSYS; } -static inline int is_file_shm_hugepages(struct file *file) +static inline bool is_file_shm_hugepages(struct file *file) { - return 0; + return false; } static inline void exit_shm(struct task_struct *task) { -- cgit v1.1 From e1c7e324539ada3b2b13ca2898bcb4948a9ef9db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2016 15:02:05 -0800 Subject: dma-mapping: always provide the dma_map_ops based implementation Move the generic implementation to now that all architectures support it and remove the HAVE_DMA_ATTR Kconfig symbol now that everyone supports them. [valentinrothberg@gmail.com: remove leftovers in Kconfig] Signed-off-by: Christoph Hellwig Cc: "David S. Miller" Cc: Aurelien Jacquiot Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Helge Deller Cc: James Hogan Cc: Jesper Nilsson Cc: Koichi Yasutake Cc: Ley Foon Tan Cc: Mark Salter Cc: Mikael Starvik Cc: Steven Miao Cc: Vineet Gupta Cc: Christian Borntraeger Cc: Joerg Roedel Cc: Sebastian Ott Signed-off-by: Valentin Rothberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/dma-mapping-broken.h | 95 -------- include/asm-generic/dma-mapping-common.h | 358 ----------------------------- include/linux/dma-attrs.h | 10 - include/linux/dma-mapping.h | 379 +++++++++++++++++++++++++++++-- 4 files changed, 361 insertions(+), 481 deletions(-) delete mode 100644 include/asm-generic/dma-mapping-broken.h delete mode 100644 include/asm-generic/dma-mapping-common.h (limited to 'include') diff --git a/include/asm-generic/dma-mapping-broken.h b/include/asm-generic/dma-mapping-broken.h deleted file mode 100644 index 6c32af9..0000000 --- a/include/asm-generic/dma-mapping-broken.h +++ /dev/null @@ -1,95 +0,0 @@ -#ifndef _ASM_GENERIC_DMA_MAPPING_H -#define _ASM_GENERIC_DMA_MAPPING_H - -/* define the dma api to allow compilation but not linking of - * dma dependent code. Code that depends on the dma-mapping - * API needs to set 'depends on HAS_DMA' in its Kconfig - */ - -struct scatterlist; - -extern void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flag); - -extern void -dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle); - -static inline void *dma_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) -{ - /* attrs is not supported and ignored */ - return dma_alloc_coherent(dev, size, dma_handle, flag); -} - -static inline void dma_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - struct dma_attrs *attrs) -{ - /* attrs is not supported and ignored */ - dma_free_coherent(dev, size, cpu_addr, dma_handle); -} - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -extern dma_addr_t -dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction); - -extern void -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction); - -extern int -dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction); - -extern void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction); - -extern dma_addr_t -dma_map_page(struct device *dev, struct page *page, unsigned long offset, - size_t size, enum dma_data_direction direction); - -extern void -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction); - -extern void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - -extern void -dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction); - -extern void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction); - -#define dma_sync_single_for_device dma_sync_single_for_cpu -#define dma_sync_single_range_for_device dma_sync_single_range_for_cpu -#define dma_sync_sg_for_device dma_sync_sg_for_cpu - -extern int -dma_mapping_error(struct device *dev, dma_addr_t dma_addr); - -extern int -dma_supported(struct device *dev, u64 mask); - -extern int -dma_set_mask(struct device *dev, u64 mask); - -extern int -dma_get_cache_alignment(void); - -extern void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); - -#endif /* _ASM_GENERIC_DMA_MAPPING_H */ diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h deleted file mode 100644 index b1bc954..0000000 --- a/include/asm-generic/dma-mapping-common.h +++ /dev/null @@ -1,358 +0,0 @@ -#ifndef _ASM_GENERIC_DMA_MAPPING_H -#define _ASM_GENERIC_DMA_MAPPING_H - -#include -#include -#include -#include -#include -#include - -static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, - size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - kmemcheck_mark_initialized(ptr, size); - BUG_ON(!valid_dma_direction(dir)); - addr = ops->map_page(dev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, - dir, attrs); - debug_dma_map_page(dev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, - dir, addr, true); - return addr; -} - -static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, - size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, attrs); - debug_dma_unmap_page(dev, addr, size, dir, true); -} - -/* - * dma_maps_sg_attrs returns 0 on error and > 0 on success. - * It should never return a value < 0. - */ -static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - int i, ents; - struct scatterlist *s; - - for_each_sg(sg, s, nents, i) - kmemcheck_mark_initialized(sg_virt(s), s->length); - BUG_ON(!valid_dma_direction(dir)); - ents = ops->map_sg(dev, sg, nents, dir, attrs); - BUG_ON(ents < 0); - debug_dma_map_sg(dev, sg, nents, ents, dir); - - return ents; -} - -static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - debug_dma_unmap_sg(dev, sg, nents, dir); - if (ops->unmap_sg) - ops->unmap_sg(dev, sg, nents, dir, attrs); -} - -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - size_t offset, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - dma_addr_t addr; - - kmemcheck_mark_initialized(page_address(page) + offset, size); - BUG_ON(!valid_dma_direction(dir)); - addr = ops->map_page(dev, page, offset, size, dir, NULL); - debug_dma_map_page(dev, page, offset, size, dir, addr, false); - - return addr; -} - -static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->unmap_page) - ops->unmap_page(dev, addr, size, dir, NULL); - debug_dma_unmap_page(dev, addr, size, dir, false); -} - -static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(dev, addr, size, dir); - debug_dma_sync_single_for_cpu(dev, addr, size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) - ops->sync_single_for_device(dev, addr, size, dir); - debug_dma_sync_single_for_device(dev, addr, size, dir); -} - -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_cpu) - ops->sync_single_for_cpu(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); -} - -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t addr, - unsigned long offset, - size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_single_for_device) - ops->sync_single_for_device(dev, addr + offset, size, dir); - debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); -} - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_cpu) - ops->sync_sg_for_cpu(dev, sg, nelems, dir); - debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - if (ops->sync_sg_for_device) - ops->sync_sg_for_device(dev, sg, nelems, dir); - debug_dma_sync_sg_for_device(dev, sg, nelems, dir); - -} - -#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) -#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL) -#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) -#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL) - -extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size); - -void *dma_common_contiguous_remap(struct page *page, size_t size, - unsigned long vm_flags, - pgprot_t prot, const void *caller); - -void *dma_common_pages_remap(struct page **pages, size_t size, - unsigned long vm_flags, pgprot_t prot, - const void *caller); -void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); - -/** - * dma_mmap_attrs - map a coherent DMA allocation into user space - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @vma: vm_area_struct describing requested user mapping - * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs - * @handle: device-view address returned from dma_alloc_attrs - * @size: size of memory originally requested in dma_alloc_attrs - * @attrs: attributes of mapping properties requested in dma_alloc_attrs - * - * Map a coherent DMA buffer previously allocated by dma_alloc_attrs - * into user space. The coherent DMA buffer must not be freed by the - * driver until the user space mapping has been released. - */ -static inline int -dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, - dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->mmap) - return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); - return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); -} - -#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) - -int -dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size); - -static inline int -dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, - dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - BUG_ON(!ops); - if (ops->get_sgtable) - return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, - attrs); - return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size); -} - -#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL) - -#ifndef arch_dma_alloc_attrs -#define arch_dma_alloc_attrs(dev, flag) (true) -#endif - -static inline void *dma_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - void *cpu_addr; - - BUG_ON(!ops); - - if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) - return cpu_addr; - - if (!arch_dma_alloc_attrs(&dev, &flag)) - return NULL; - if (!ops->alloc) - return NULL; - - cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); - debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); - return cpu_addr; -} - -static inline void dma_free_attrs(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!ops); - WARN_ON(irqs_disabled()); - - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) - return; - - if (!ops->free) - return; - - debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - ops->free(dev, size, cpu_addr, dma_handle, attrs); -} - -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - return dma_alloc_attrs(dev, size, dma_handle, flag, NULL); -} - -static inline void dma_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - return dma_free_attrs(dev, size, cpu_addr, dma_handle, NULL); -} - -static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); - return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs); -} - -static inline void dma_free_noncoherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - DEFINE_DMA_ATTRS(attrs); - - dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); - dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); -} - -static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - debug_dma_mapping_error(dev, dma_addr); - - if (get_dma_ops(dev)->mapping_error) - return get_dma_ops(dev)->mapping_error(dev, dma_addr); - -#ifdef DMA_ERROR_CODE - return dma_addr == DMA_ERROR_CODE; -#else - return 0; -#endif -} - -#ifndef HAVE_ARCH_DMA_SUPPORTED -static inline int dma_supported(struct device *dev, u64 mask) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - if (!ops) - return 0; - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); -} -#endif - -#ifndef HAVE_ARCH_DMA_SET_MASK -static inline int dma_set_mask(struct device *dev, u64 mask) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - if (ops->set_dma_mask) - return ops->set_dma_mask(dev, mask); - - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} -#endif - -#endif diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h index c8e1831..99c0be0 100644 --- a/include/linux/dma-attrs.h +++ b/include/linux/dma-attrs.h @@ -41,7 +41,6 @@ static inline void init_dma_attrs(struct dma_attrs *attrs) bitmap_zero(attrs->flags, __DMA_ATTRS_LONGS); } -#ifdef CONFIG_HAVE_DMA_ATTRS /** * dma_set_attr - set a specific attribute * @attr: attribute to set @@ -67,14 +66,5 @@ static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) BUG_ON(attr >= DMA_ATTR_MAX); return test_bit(attr, attrs->flags); } -#else /* !CONFIG_HAVE_DMA_ATTRS */ -static inline void dma_set_attr(enum dma_attr attr, struct dma_attrs *attrs) -{ -} -static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) -{ - return 0; -} -#endif /* CONFIG_HAVE_DMA_ATTRS */ #endif /* _DMA_ATTR_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 2e551e2..cc0517b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -6,8 +6,12 @@ #include #include #include +#include #include #include +#include +#include +#include /* * A dma_addr_t can hold any valid DMA or bus address for the platform. @@ -86,7 +90,363 @@ static inline int is_device_dma_capable(struct device *dev) #ifdef CONFIG_HAS_DMA #include #else -#include +/* + * Define the dma api to allow compilation but not linking of + * dma dependent code. Code that depends on the dma-mapping + * API needs to set 'depends on HAS_DMA' in its Kconfig + */ +extern struct dma_map_ops bad_dma_ops; +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return &bad_dma_ops; +} +#endif + +static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, + size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr; + + kmemcheck_mark_initialized(ptr, size); + BUG_ON(!valid_dma_direction(dir)); + addr = ops->map_page(dev, virt_to_page(ptr), + (unsigned long)ptr & ~PAGE_MASK, size, + dir, attrs); + debug_dma_map_page(dev, virt_to_page(ptr), + (unsigned long)ptr & ~PAGE_MASK, size, + dir, addr, true); + return addr; +} + +static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, + size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->unmap_page) + ops->unmap_page(dev, addr, size, dir, attrs); + debug_dma_unmap_page(dev, addr, size, dir, true); +} + +/* + * dma_maps_sg_attrs returns 0 on error and > 0 on success. + * It should never return a value < 0. + */ +static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + int i, ents; + struct scatterlist *s; + + for_each_sg(sg, s, nents, i) + kmemcheck_mark_initialized(sg_virt(s), s->length); + BUG_ON(!valid_dma_direction(dir)); + ents = ops->map_sg(dev, sg, nents, dir, attrs); + BUG_ON(ents < 0); + debug_dma_map_sg(dev, sg, nents, ents, dir); + + return ents; +} + +static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + debug_dma_unmap_sg(dev, sg, nents, dir); + if (ops->unmap_sg) + ops->unmap_sg(dev, sg, nents, dir, attrs); +} + +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + dma_addr_t addr; + + kmemcheck_mark_initialized(page_address(page) + offset, size); + BUG_ON(!valid_dma_direction(dir)); + addr = ops->map_page(dev, page, offset, size, dir, NULL); + debug_dma_map_page(dev, page, offset, size, dir, addr, false); + + return addr; +} + +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->unmap_page) + ops->unmap_page(dev, addr, size, dir, NULL); + debug_dma_unmap_page(dev, addr, size, dir, false); +} + +static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, + enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(dev, addr, size, dir); + debug_dma_sync_single_for_cpu(dev, addr, size, dir); +} + +static inline void dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_device) + ops->sync_single_for_device(dev, addr, size, dir); + debug_dma_sync_single_for_device(dev, addr, size, dir); +} + +static inline void dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t addr, + unsigned long offset, + size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(dev, addr + offset, size, dir); + debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir); +} + +static inline void dma_sync_single_range_for_device(struct device *dev, + dma_addr_t addr, + unsigned long offset, + size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_single_for_device) + ops->sync_single_for_device(dev, addr + offset, size, dir); + debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir); +} + +static inline void +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(dev, sg, nelems, dir); + debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); +} + +static inline void +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->sync_sg_for_device) + ops->sync_sg_for_device(dev, sg, nelems, dir); + debug_dma_sync_sg_for_device(dev, sg, nelems, dir); + +} + +#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL) +#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL) +#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL) +#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL) + +extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size); + +void *dma_common_contiguous_remap(struct page *page, size_t size, + unsigned long vm_flags, + pgprot_t prot, const void *caller); + +void *dma_common_pages_remap(struct page **pages, size_t size, + unsigned long vm_flags, pgprot_t prot, + const void *caller); +void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags); + +/** + * dma_mmap_attrs - map a coherent DMA allocation into user space + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @vma: vm_area_struct describing requested user mapping + * @cpu_addr: kernel CPU-view address returned from dma_alloc_attrs + * @handle: device-view address returned from dma_alloc_attrs + * @size: size of memory originally requested in dma_alloc_attrs + * @attrs: attributes of mapping properties requested in dma_alloc_attrs + * + * Map a coherent DMA buffer previously allocated by dma_alloc_attrs + * into user space. The coherent DMA buffer must not be freed by the + * driver until the user space mapping has been released. + */ +static inline int +dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + if (ops->mmap) + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + +#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) + +int +dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size); + +static inline int +dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, + dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + if (ops->get_sgtable) + return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, + attrs); + return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size); +} + +#define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, NULL) + +#ifndef arch_dma_alloc_attrs +#define arch_dma_alloc_attrs(dev, flag) (true) +#endif + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; + + BUG_ON(!ops); + + if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)) + return cpu_addr; + + if (!arch_dma_alloc_attrs(&dev, &flag)) + return NULL; + if (!ops->alloc) + return NULL; + + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; +} + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!ops); + WARN_ON(irqs_disabled()); + + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + + if (!ops->free) + return; + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free(dev, size, cpu_addr, dma_handle, attrs); +} + +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + return dma_alloc_attrs(dev, size, dma_handle, flag, NULL); +} + +static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + return dma_free_attrs(dev, size, cpu_addr, dma_handle, NULL); +} + +static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); + return dma_alloc_attrs(dev, size, dma_handle, gfp, &attrs); +} + +static inline void dma_free_noncoherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_NON_CONSISTENT, &attrs); + dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); +} + +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + debug_dma_mapping_error(dev, dma_addr); + + if (get_dma_ops(dev)->mapping_error) + return get_dma_ops(dev)->mapping_error(dev, dma_addr); + +#ifdef DMA_ERROR_CODE + return dma_addr == DMA_ERROR_CODE; +#else + return 0; +#endif +} + +#ifndef HAVE_ARCH_DMA_SUPPORTED +static inline int dma_supported(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops) + return 0; + if (!ops->dma_supported) + return 1; + return ops->dma_supported(dev, mask); +} +#endif + +#ifndef HAVE_ARCH_DMA_SET_MASK +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->set_dma_mask) + return ops->set_dma_mask(dev, mask); + + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + *dev->dma_mask = mask; + return 0; +} #endif static inline u64 dma_get_mask(struct device *dev) @@ -259,22 +619,6 @@ static inline void dmam_release_declared_memory(struct device *dev) } #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ -#ifndef CONFIG_HAVE_DMA_ATTRS -struct dma_attrs; - -#define dma_map_single_attrs(dev, cpu_addr, size, dir, attrs) \ - dma_map_single(dev, cpu_addr, size, dir) - -#define dma_unmap_single_attrs(dev, dma_addr, size, dir, attrs) \ - dma_unmap_single(dev, dma_addr, size, dir) - -#define dma_map_sg_attrs(dev, sgl, nents, dir, attrs) \ - dma_map_sg(dev, sgl, nents, dir) - -#define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \ - dma_unmap_sg(dev, sgl, nents, dir) - -#else static inline void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t gfp) { @@ -300,7 +644,6 @@ static inline int dma_mmap_writecombine(struct device *dev, dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); } -#endif /* CONFIG_HAVE_DMA_ATTRS */ #ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME -- cgit v1.1 From 20d666e41166f8023ff3d960e832d87ded18c5c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2016 15:02:09 -0800 Subject: dma-mapping: remove This wasn't an asm-generic header to start with, and can be merged into dma-mapping.h trivially. Signed-off-by: Christoph Hellwig Cc: "David S. Miller" Cc: Aurelien Jacquiot Cc: Chris Metcalf Cc: David Howells Cc: Geert Uytterhoeven Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Helge Deller Cc: James Hogan Cc: Jesper Nilsson Cc: Koichi Yasutake Cc: Ley Foon Tan Cc: Mark Salter Cc: Mikael Starvik Cc: Steven Miao Cc: Vineet Gupta Cc: Christian Borntraeger Cc: Joerg Roedel Cc: Sebastian Ott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/dma-coherent.h | 32 -------------------------------- include/linux/dma-mapping.h | 34 ++++++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 38 deletions(-) delete mode 100644 include/asm-generic/dma-coherent.h (limited to 'include') diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h deleted file mode 100644 index 0297e58..0000000 --- a/include/asm-generic/dma-coherent.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef DMA_COHERENT_H -#define DMA_COHERENT_H - -#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT -/* - * These three functions are only for dma allocator. - * Don't use them in device drivers. - */ -int dma_alloc_from_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret); -int dma_release_from_coherent(struct device *dev, int order, void *vaddr); - -int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, size_t size, int *ret); -/* - * Standard interface - */ -#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY -int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size, int flags); - -void dma_release_declared_memory(struct device *dev); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); -#else -#define dma_alloc_from_coherent(dev, size, handle, ret) (0) -#define dma_release_from_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0) -#endif - -#endif diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cc0517b..d6b575b 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -11,7 +11,6 @@ #include #include #include -#include /* * A dma_addr_t can hold any valid DMA or bus address for the platform. @@ -87,6 +86,23 @@ static inline int is_device_dma_capable(struct device *dev) return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; } +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT +/* + * These three functions are only for dma allocator. + * Don't use them in device drivers. + */ +int dma_alloc_from_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_coherent(struct device *dev, int order, void *vaddr); + +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); +#else +#define dma_alloc_from_coherent(dev, size, handle, ret) (0) +#define dma_release_from_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0) +#endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ + #ifdef CONFIG_HAS_DMA #include #else @@ -568,7 +584,13 @@ static inline int dma_get_cache_alignment(void) #define DMA_MEMORY_INCLUDES_CHILDREN 0x04 #define DMA_MEMORY_EXCLUSIVE 0x08 -#ifndef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT +int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size, int flags); +void dma_release_declared_memory(struct device *dev); +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size); +#else static inline int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags) @@ -587,7 +609,7 @@ dma_mark_declared_memory_occupied(struct device *dev, { return ERR_PTR(-EBUSY); } -#endif +#endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ /* * Managed DMA API @@ -600,13 +622,13 @@ extern void *dmam_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -#ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT extern int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags); extern void dmam_release_declared_memory(struct device *dev); -#else /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ +#else /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ static inline int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, gfp_t gfp) @@ -617,7 +639,7 @@ static inline int dmam_declare_coherent_memory(struct device *dev, static inline void dmam_release_declared_memory(struct device *dev) { } -#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ +#endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ static inline void *dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t gfp) -- cgit v1.1 From 8e99469ab0f821bea77625cd4775ca529d4ca7d4 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 20 Jan 2016 15:02:12 -0800 Subject: dma-mapping: use offset_in_page macro Use offset_in_page macro instead of (addr & ~PAGE_MASK). Signed-off-by: Geliang Tang Acked-by: Will Deacon Cc: Christian Borntraeger Cc: Joerg Roedel Cc: Sebastian Ott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index d6b575b..75857cd 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -129,10 +129,10 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, kmemcheck_mark_initialized(ptr, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, + offset_in_page(ptr), size, dir, attrs); debug_dma_map_page(dev, virt_to_page(ptr), - (unsigned long)ptr & ~PAGE_MASK, size, + offset_in_page(ptr), size, dir, addr, true); return addr; } -- cgit v1.1 From 6d378dac7c4905db38f8127c4e618f0f627a4ced Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:18 -0800 Subject: mm: memcontrol: drop unused @css argument in memcg_init_kmem This series adds accounting of the historical "kmem" memory consumers to the cgroup2 memory controller. These consumers include the dentry cache, the inode cache, kernel stack pages, and a few others that are pointed out in patch 7/8. The footprint of these consumers is directly tied to userspace activity in common workloads, and so they have to be part of the minimally viable configuration in order to present a complete feature to our users. The cgroup2 interface of the memory controller is far from complete, but this series, along with the socket memory accounting series, provides the final semantic changes for the existing memory knobs in the cgroup2 interface, which is scheduled for initial release in the next merge window. This patch (of 8): Remove unused css argument frmo memcg_init_kmem() Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/tcp_memcontrol.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 01ff7c6..020c2de 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h @@ -4,6 +4,7 @@ struct cgroup_subsys; struct mem_cgroup; -int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss); +int tcp_init_cgroup(struct mem_cgroup *memcg); void tcp_destroy_cgroup(struct mem_cgroup *memcg); + #endif /* _TCP_MEMCG_H */ -- cgit v1.1 From 567e9ab2e614e55feca20e8bcb54b629e9cc1a3b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:24 -0800 Subject: mm: memcontrol: give the kmem states more descriptive names On any given memcg, the kmem accounting feature has three separate states: not initialized, structures allocated, and actively accounting slab memory. These are represented through a combination of the kmem_acct_activated and kmem_acct_active flags, which is confusing. Convert to a kmem_state enum with the states NONE, ALLOCATED, and ONLINE. Then rename the functions to modify the state accordingly. This follows the nomenclature of css object states more closely. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Tejun Heo Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 189f04d..54dab4d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -152,6 +152,12 @@ struct mem_cgroup_thresholds { struct mem_cgroup_threshold_ary *spare; }; +enum memcg_kmem_state { + KMEM_NONE, + KMEM_ALLOCATED, + KMEM_ONLINE, +}; + /* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide @@ -233,8 +239,7 @@ struct mem_cgroup { #if defined(CONFIG_MEMCG_KMEM) /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; - bool kmem_acct_activated; - bool kmem_acct_active; + enum memcg_kmem_state kmem_state; #endif int last_scanned_node; @@ -750,9 +755,9 @@ static inline bool memcg_kmem_enabled(void) return static_branch_unlikely(&memcg_kmem_enabled_key); } -static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) +static inline bool memcg_kmem_online(struct mem_cgroup *memcg) { - return memcg->kmem_acct_active; + return memcg->kmem_state == KMEM_ONLINE; } /* @@ -850,7 +855,7 @@ static inline bool memcg_kmem_enabled(void) return false; } -static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) +static inline bool memcg_kmem_online(struct mem_cgroup *memcg) { return false; } -- cgit v1.1 From 127424c86bb6cb87f0b563d9fdcfbbaf3c86ecec Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:32 -0800 Subject: mm: memcontrol: move kmem accounting code to CONFIG_MEMCG The cgroup2 memory controller will account important in-kernel memory consumers per default. Move all necessary components to CONFIG_MEMCG. Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list_lru.h | 4 ++-- include/linux/memcontrol.h | 7 ++++--- include/linux/sched.h | 4 ++-- include/linux/slab.h | 2 +- include/linux/slab_def.h | 3 ++- include/linux/slub_def.h | 2 +- 6 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 2a6b994..cb0ba9f 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -40,7 +40,7 @@ struct list_lru_node { spinlock_t lock; /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; -#ifdef CONFIG_MEMCG_KMEM +#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) /* for cgroup aware lrus points to per cgroup lists, otherwise NULL */ struct list_lru_memcg *memcg_lrus; #endif @@ -48,7 +48,7 @@ struct list_lru_node { struct list_lru { struct list_lru_node *node; -#ifdef CONFIG_MEMCG_KMEM +#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) struct list_head list; #endif }; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 54dab4d..a87704e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -236,7 +236,7 @@ struct mem_cgroup { #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; #endif -#if defined(CONFIG_MEMCG_KMEM) +#ifndef CONFIG_SLOB /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; enum memcg_kmem_state kmem_state; @@ -735,7 +735,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) } #endif -#ifdef CONFIG_MEMCG_KMEM +#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) extern struct static_key_false memcg_kmem_enabled_key; extern int memcg_nr_cache_ids; @@ -891,5 +891,6 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) { } -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_MEMCG && !CONFIG_SLOB */ + #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 02dabf2..f1e81e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1476,10 +1476,10 @@ struct task_struct { unsigned in_iowait:1; #ifdef CONFIG_MEMCG unsigned memcg_may_oom:1; -#endif -#ifdef CONFIG_MEMCG_KMEM +#ifndef CONFIG_SLOB unsigned memcg_kmem_skip_account:1; #endif +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index 3ffee74..3627d5c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -86,7 +86,7 @@ #else # define SLAB_FAILSLAB 0x00000000UL #endif -#ifdef CONFIG_MEMCG_KMEM +#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) # define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */ #else # define SLAB_ACCOUNT 0x00000000UL diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 33d0490..cf139d3 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -69,7 +69,8 @@ struct kmem_cache { */ int obj_offset; #endif /* CONFIG_DEBUG_SLAB */ -#ifdef CONFIG_MEMCG_KMEM + +#ifdef CONFIG_MEMCG struct memcg_cache_params memcg_params; #endif diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 3388511..b7e57927 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -84,7 +84,7 @@ struct kmem_cache { #ifdef CONFIG_SYSFS struct kobject kobj; /* For sysfs */ #endif -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct memcg_cache_params memcg_params; int max_attr_size; /* for propagation, maximum size of a stored attr */ #ifdef CONFIG_SYSFS -- cgit v1.1 From 489c2a20a414351fe0813a727c34600c0f7292ae Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:41 -0800 Subject: mm: memcontrol: introduce CONFIG_MEMCG_LEGACY_KMEM Let the user know that CONFIG_MEMCG_KMEM does not apply to the cgroup2 interface. This also makes legacy-only code sections stand out better. [arnd@arndb.de: mm: memcontrol: only manage socket pressure for CONFIG_INET] Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Acked-by: Vladimir Davydov Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a87704e..2bb14d02 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -233,7 +233,7 @@ struct mem_cgroup { */ struct mem_cgroup_stat_cpu __percpu *stat; -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) +#if defined(CONFIG_MEMCG_LEGACY_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; #endif #ifndef CONFIG_SLOB @@ -717,7 +717,7 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG_LEGACY_KMEM if (memcg->tcp_mem.memory_pressure) return true; #endif -- cgit v1.1 From d55f90bfab40e3b5db323711d28186ff09461692 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:02:44 -0800 Subject: net: drop tcp_memcontrol.c tcp_memcontrol.c only contains legacy memory.tcp.kmem.* file definitions and mem_cgroup->tcp_mem init/destroy stuff. This doesn't belong to network subsys. Let's move it to memcontrol.c. This also allows us to reuse generic code for handling legacy memcg files. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: "David S. Miller" Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/tcp_memcontrol.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/net/tcp_memcontrol.h (limited to 'include') diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h deleted file mode 100644 index 020c2de..0000000 --- a/include/net/tcp_memcontrol.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TCP_MEMCG_H -#define _TCP_MEMCG_H - -struct cgroup_subsys; -struct mem_cgroup; - -int tcp_init_cgroup(struct mem_cgroup *memcg); -void tcp_destroy_cgroup(struct mem_cgroup *memcg); - -#endif /* _TCP_MEMCG_H */ -- cgit v1.1 From d886f4e483ce63a3304adc9eda87031b93341c28 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:47 -0800 Subject: mm: memcontrol: rein in the CONFIG space madness What CONFIG_INET and CONFIG_LEGACY_KMEM guard inside the memory controller code is insignificant, having these conditionals is not worth the complication and fragility that comes with them. [akpm@linux-foundation.org: rework mem_cgroup_css_free() statement ordering] Signed-off-by: Johannes Weiner Cc: Michal Hocko Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2bb14d02..47995b4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -233,9 +233,11 @@ struct mem_cgroup { */ struct mem_cgroup_stat_cpu __percpu *stat; -#if defined(CONFIG_MEMCG_LEGACY_KMEM) && defined(CONFIG_INET) + unsigned long socket_pressure; + + /* Legacy tcp memory accounting */ struct cg_proto tcp_mem; -#endif + #ifndef CONFIG_SLOB /* Index in the kmem_cache->memcg_params.memcg_caches array */ int kmemcg_id; @@ -254,10 +256,6 @@ struct mem_cgroup { struct wb_domain cgwb_domain; #endif -#ifdef CONFIG_INET - unsigned long socket_pressure; -#endif - /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; @@ -712,15 +710,13 @@ void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages); -#if defined(CONFIG_MEMCG) && defined(CONFIG_INET) +#ifdef CONFIG_MEMCG extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { -#ifdef CONFIG_MEMCG_LEGACY_KMEM if (memcg->tcp_mem.memory_pressure) return true; -#endif do { if (time_before(jiffies, memcg->socket_pressure)) return true; -- cgit v1.1 From 0db1529817b7b16226421f01470c5ba982c5f302 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:50 -0800 Subject: mm: memcontrol: flatten struct cg_proto There are no more external users of struct cg_proto, flatten the structure into struct mem_cgroup. Since using those struct members doesn't stand out as much anymore, add cgroup2 static branches to make it clearer which code is legacy. Suggested-by: Vladimir Davydov Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 47995b4..a3869bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -85,12 +85,6 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -struct cg_proto { - struct page_counter memory_allocated; /* Current allocated memory. */ - int memory_pressure; - bool active; -}; - #ifdef CONFIG_MEMCG struct mem_cgroup_stat_cpu { long count[MEM_CGROUP_STAT_NSTATS]; @@ -169,8 +163,11 @@ struct mem_cgroup { /* Accounted resources */ struct page_counter memory; + + /* Legacy consumer-oriented counters */ struct page_counter memsw; struct page_counter kmem; + struct page_counter tcpmem; /* Normal memory consumption range */ unsigned long low; @@ -236,7 +233,8 @@ struct mem_cgroup { unsigned long socket_pressure; /* Legacy tcp memory accounting */ - struct cg_proto tcp_mem; + bool tcpmem_active; + int tcpmem_pressure; #ifndef CONFIG_SLOB /* Index in the kmem_cache->memcg_params.memcg_caches array */ @@ -715,7 +713,7 @@ extern struct static_key_false memcg_sockets_enabled_key; #define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key) static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - if (memcg->tcp_mem.memory_pressure) + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) return true; do { if (time_before(jiffies, memcg->socket_pressure)) -- cgit v1.1 From 0b8f73e104285a4badf9d768d1c39b06d77d1f97 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:02:53 -0800 Subject: mm: memcontrol: clean up alloc, online, offline, free functions The creation and teardown of struct mem_cgroup is fairly messy and that has attracted mistakes and subtle bugs before. The main cause for this is that there is no clear model about what needs to happen when, and that attracts more chaos. So create one: 1. mem_cgroup_alloc() should allocate struct mem_cgroup and its auxiliary members and initialize work items, locks etc. so that the object it returns is fully initialized and in a neutral state. 2. mem_cgroup_css_alloc() will use mem_cgroup_alloc() to obtain a new memcg object and configure it and the system according to the role of the new memory-controlled cgroup in the hierarchy. 3. mem_cgroup_css_online() is no longer needed to synchronize with iterators, but it verifies css->id which isn't available earlier. 4. mem_cgroup_css_offline() implements stuff that needs to happen upon the user-visible destruction of a cgroup, which includes stopping all user interfacing as well as releasing certain structures when continued memory consumption would be unexpected at that point. 5. mem_cgroup_css_free() prepares the system and the memcg object for the object's disappearance, neutralizes its state, and then gives it back to mem_cgroup_free(). 6. mem_cgroup_free() releases struct mem_cgroup and auxiliary memory. [arnd@arndb.de: fix SLOB build regression] Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a3869bf..27123e5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -181,9 +181,6 @@ struct mem_cgroup { /* vmpressure notifications */ struct vmpressure vmpressure; - /* css_online() has been completed */ - int initialized; - /* * Should the accounting and control be hierarchical, per subtree? */ -- cgit v1.1 From 37e84351198be087335ad2b2253b35c7cc76a5ad Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:02:56 -0800 Subject: mm: memcontrol: charge swap to cgroup2 This patchset introduces swap accounting to cgroup2. This patch (of 7): In the legacy hierarchy we charge memsw, which is dubious, because: - memsw.limit must be >= memory.limit, so it is impossible to limit swap usage less than memory usage. Taking into account the fact that the primary limiting mechanism in the unified hierarchy is memory.high while memory.limit is either left unset or set to a very large value, moving memsw.limit knob to the unified hierarchy would effectively make it impossible to limit swap usage according to the user preference. - memsw.usage != memory.usage + swap.usage, because a page occupying both swap entry and a swap cache page is charged only once to memsw counter. As a result, it is possible to effectively eat up to memory.limit of memory pages *and* memsw.limit of swap entries, which looks unexpected. That said, we should provide a different swap limiting mechanism for cgroup2. This patch adds mem_cgroup->swap counter, which charges the actual number of swap entries used by a cgroup. It is only charged in the unified hierarchy, while the legacy hierarchy memsw logic is left intact. The swap usage can be monitored using new memory.swap.current file and limited using memory.swap.max. Note, to charge swap resource properly in the unified hierarchy, we have to make swap_entry_free uncharge swap only when ->usage reaches zero, not just ->count, i.e. when all references to a swap entry, including the one taken by swap cache, are gone. This is necessary, because otherwise swap-in could result in uncharging swap even if the page is still in swap cache and hence still occupies a swap entry. At the same time, this shouldn't break memsw counter logic, where a page is never charged twice for using both memory and swap, because in case of legacy hierarchy we uncharge swap on commit (see mem_cgroup_commit_charge). Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + include/linux/swap.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 27123e5..6e01262 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -163,6 +163,7 @@ struct mem_cgroup { /* Accounted resources */ struct page_counter memory; + struct page_counter swap; /* Legacy consumer-oriented counters */ struct page_counter memsw; diff --git a/include/linux/swap.h b/include/linux/swap.h index 414e101..83b95f3 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -368,11 +368,17 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) #endif #ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); +extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); extern void mem_cgroup_uncharge_swap(swp_entry_t entry); #else static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { } +static inline int mem_cgroup_try_charge_swap(struct page *page, + swp_entry_t entry) +{ + return 0; +} static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) { } -- cgit v1.1 From eb01aaab43084f1c919ce66183fea005033351b9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:03:02 -0800 Subject: mm: memcontrol: replace mem_cgroup_lruvec_online with mem_cgroup_online mem_cgroup_lruvec_online() takes lruvec, but it only needs memcg. Since get_scan_count(), which is the only user of this function, now possesses pointer to memcg, let's pass memcg directly to mem_cgroup_online() instead of picking it out of lruvec and rename the function accordingly. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6e01262..1666617 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -355,6 +355,13 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } +static inline bool mem_cgroup_online(struct mem_cgroup *memcg) +{ + if (mem_cgroup_disabled()) + return true; + return !!(memcg->css.flags & CSS_ONLINE); +} + /* * For memory reclaim. */ @@ -363,20 +370,6 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, int nr_pages); -static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) -{ - struct mem_cgroup_per_zone *mz; - struct mem_cgroup *memcg; - - if (mem_cgroup_disabled()) - return true; - - mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); - memcg = mz->memcg; - - return !!(memcg->css.flags & CSS_ONLINE); -} - static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { @@ -589,13 +582,13 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline bool -mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) +static inline bool mem_cgroup_online(struct mem_cgroup *memcg) { return true; } -static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +static inline bool +mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { return true; } -- cgit v1.1 From 6f2cb2f17700a39567cf3e9a2e95041def5f3688 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:03:05 -0800 Subject: swap.h: move memcg related stuff to the end of the file The following patches will add more functions to the memcg section of include/linux/swap.h. Some of them will need values defined below the current location of the section. So let's move the section to the end of the file. No functional changes intended. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 70 ++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 83b95f3..c2bd163 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -350,39 +350,7 @@ extern void check_move_unevictable_pages(struct page **, int nr_pages); extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_MEMCG -static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) -{ - /* root ? */ - if (mem_cgroup_disabled() || !memcg->css.parent) - return vm_swappiness; - - return memcg->swappiness; -} -#else -static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) -{ - return vm_swappiness; -} -#endif -#ifdef CONFIG_MEMCG_SWAP -extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); -extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); -extern void mem_cgroup_uncharge_swap(swp_entry_t entry); -#else -static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) -{ -} -static inline int mem_cgroup_try_charge_swap(struct page *page, - swp_entry_t entry) -{ - return 0; -} -static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) -{ -} -#endif #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); @@ -561,5 +529,43 @@ static inline swp_entry_t get_swap_page(void) } #endif /* CONFIG_SWAP */ + +#ifdef CONFIG_MEMCG +static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) +{ + /* root ? */ + if (mem_cgroup_disabled() || !memcg->css.parent) + return vm_swappiness; + + return memcg->swappiness; +} + +#else +static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) +{ + return vm_swappiness; +} +#endif + +#ifdef CONFIG_MEMCG_SWAP +extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); +extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); +extern void mem_cgroup_uncharge_swap(swp_entry_t entry); +#else +static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +{ +} + +static inline int mem_cgroup_try_charge_swap(struct page *page, + swp_entry_t entry) +{ + return 0; +} + +static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ -- cgit v1.1 From d8b38438a0bcb362c396f49d8279ef7b505917f4 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:03:07 -0800 Subject: mm: vmscan: do not scan anon pages if memcg swap limit is hit We don't scan anonymous memory if we ran out of swap, neither should we do it in case memcg swap limit is hit, because swap out is impossible anyway. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index c2bd163..a587050 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -551,6 +551,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); extern void mem_cgroup_uncharge_swap(swp_entry_t entry); +extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); #else static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { @@ -565,6 +566,11 @@ static inline int mem_cgroup_try_charge_swap(struct page *page, static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) { } + +static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) +{ + return get_nr_swap_pages(); +} #endif #endif /* __KERNEL__*/ -- cgit v1.1 From 5ccc5abaaf6f9242cc63342c5286990233f392fa Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 20 Jan 2016 15:03:10 -0800 Subject: mm: free swap cache aggressively if memcg swap is full Swap cache pages are freed aggressively if swap is nearly full (>50% currently), because otherwise we are likely to stop scanning anonymous when we near the swap limit even if there is plenty of freeable swap cache pages. We should follow the same trend in case of memory cgroup, which has its own swap limit. Signed-off-by: Vladimir Davydov Acked-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index a587050..d18b65c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -552,6 +552,7 @@ extern void mem_cgroup_swapout(struct page *page, swp_entry_t entry); extern int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry); extern void mem_cgroup_uncharge_swap(swp_entry_t entry); extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); +extern bool mem_cgroup_swap_full(struct page *page); #else static inline void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { @@ -571,6 +572,11 @@ static inline long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) { return get_nr_swap_pages(); } + +static inline bool mem_cgroup_swap_full(struct page *page) +{ + return vm_swap_full(); +} #endif #endif /* __KERNEL__*/ -- cgit v1.1 From b2807f07f4f87362925b8a5b8cbb7b624da10f03 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 20 Jan 2016 15:03:22 -0800 Subject: mm: memcontrol: add "sock" to cgroup2 memory.stat Provide statistics on how much of a cgroup's memory footprint is made up of socket buffers from network connections owned by the group. Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 1666617..9ae48d4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -50,6 +50,9 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ MEM_CGROUP_STAT_NSTATS, + /* default hierarchy stats */ + MEMCG_SOCK, + MEMCG_NR_STAT, }; struct mem_cgroup_reclaim_cookie { @@ -87,7 +90,7 @@ enum mem_cgroup_events_target { #ifdef CONFIG_MEMCG struct mem_cgroup_stat_cpu { - long count[MEM_CGROUP_STAT_NSTATS]; + long count[MEMCG_NR_STAT]; unsigned long events[MEMCG_NR_EVENTS]; unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; -- cgit v1.1