diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 21:22:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 21:22:12 -0700 |
commit | 642e7fd23353e22290e3d51719fcb658dc252342 (patch) | |
tree | 93688d5ff15836d8e5b0e097748f7fabb13a303a /mm/migrate.c | |
parent | 21035965f60b0502fc6537b232839389bb4ce664 (diff) | |
parent | c9a211951c7c79cfb5de888d7d9550872868b086 (diff) | |
download | op-kernel-dev-642e7fd23353e22290e3d51719fcb658dc252342.zip op-kernel-dev-642e7fd23353e22290e3d51719fcb658dc252342.tar.gz |
Merge branch 'syscalls-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux
Pull removal of in-kernel calls to syscalls from Dominik Brodowski:
"System calls are interaction points between userspace and the kernel.
Therefore, system call functions such as sys_xyzzy() or
compat_sys_xyzzy() should only be called from userspace via the
syscall table, but not from elsewhere in the kernel.
At least on 64-bit x86, it will likely be a hard requirement from
v4.17 onwards to not call system call functions in the kernel: It is
better to use use a different calling convention for system calls
there, where struct pt_regs is decoded on-the-fly in a syscall wrapper
which then hands processing over to the actual syscall function. This
means that only those parameters which are actually needed for a
specific syscall are passed on during syscall entry, instead of
filling in six CPU registers with random user space content all the
time (which may cause serious trouble down the call chain). Those
x86-specific patches will be pushed through the x86 tree in the near
future.
Moreover, rules on how data may be accessed may differ between kernel
data and user data. This is another reason why calling sys_xyzzy() is
generally a bad idea, and -- at most -- acceptable in arch-specific
code.
This patchset removes all in-kernel calls to syscall functions in the
kernel with the exception of arch/. On top of this, it cleans up the
three places where many syscalls are referenced or prototyped, namely
kernel/sys_ni.c, include/linux/syscalls.h and include/linux/compat.h"
* 'syscalls-next' of git://git.kernel.org/pub/scm/linux/kernel/git/brodo/linux: (109 commits)
bpf: whitelist all syscalls for error injection
kernel/sys_ni: remove {sys_,sys_compat} from cond_syscall definitions
kernel/sys_ni: sort cond_syscall() entries
syscalls/x86: auto-create compat_sys_*() prototypes
syscalls: sort syscall prototypes in include/linux/compat.h
net: remove compat_sys_*() prototypes from net/compat.h
syscalls: sort syscall prototypes in include/linux/syscalls.h
kexec: move sys_kexec_load() prototype to syscalls.h
x86/sigreturn: use SYSCALL_DEFINE0
x86: fix sys_sigreturn() return type to be long, not unsigned long
x86/ioport: add ksys_ioperm() helper; remove in-kernel calls to sys_ioperm()
mm: add ksys_readahead() helper; remove in-kernel calls to sys_readahead()
mm: add ksys_mmap_pgoff() helper; remove in-kernel calls to sys_mmap_pgoff()
mm: add ksys_fadvise64_64() helper; remove in-kernel call to sys_fadvise64_64()
fs: add ksys_fallocate() wrapper; remove in-kernel calls to sys_fallocate()
fs: add ksys_p{read,write}64() helpers; remove in-kernel calls to syscalls
fs: add ksys_truncate() wrapper; remove in-kernel calls to sys_truncate()
fs: add ksys_sync_file_range helper(); remove in-kernel calls to syscall
kernel: add ksys_setsid() helper; remove in-kernel call to sys_setsid()
kernel: add ksys_unshare() helper; remove in-kernel calls to sys_unshare()
...
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 39 |
1 files changed, 35 insertions, 4 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 1e5525a..0038866 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -34,6 +34,7 @@ #include <linux/backing-dev.h> #include <linux/compaction.h> #include <linux/syscalls.h> +#include <linux/compat.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> @@ -1745,10 +1746,10 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, * Move a list of pages in the address space of the currently executing * process. */ -SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, - const void __user * __user *, pages, - const int __user *, nodes, - int __user *, status, int, flags) +static int kernel_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, int flags) { struct task_struct *task; struct mm_struct *mm; @@ -1807,6 +1808,36 @@ out: return err; } +SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, + const void __user * __user *, pages, + const int __user *, nodes, + int __user *, status, int, flags) +{ + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, + compat_uptr_t __user *, pages32, + const int __user *, nodes, + int __user *, status, + int, flags) +{ + const void __user * __user *pages; + int i; + + pages = compat_alloc_user_space(nr_pages * sizeof(void *)); + for (i = 0; i < nr_pages; i++) { + compat_uptr_t p; + + if (get_user(p, pages32 + i) || + put_user(compat_ptr(p), pages + i)) + return -EFAULT; + } + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} +#endif /* CONFIG_COMPAT */ + #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA |