Merge tag 'seccomp-v4.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook: "Major additions: - sysctl and seccomp operation to discover available actions (tyhicks) - new per-filter configurable logging infrastructure and sysctl (tyhicks) - SECCOMP_RET_LOG to log allowed syscalls (tyhicks) - SECCOMP_RET_KILL_PROCESS as the new strictest possible action - self-tests for new behaviors" [ This is the seccomp part of the security pull request during the merge window that was nixed due to unrelated problems - Linus ] * tag 'seccomp-v4.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: samples: Unrename SECCOMP_RET_KILL selftests/seccomp: Test thread vs process killing seccomp: Implement SECCOMP_RET_KILL_PROCESS action seccomp: Introduce SECCOMP_RET_KILL_PROCESS seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD seccomp: Action to log before allowing seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW seccomp: Selftest for detection of filter flag support seccomp: Sysctl to configure actions that are allowed to be logged seccomp: Operation for checking if an action is available seccomp: Sysctl to display available actions seccomp: Provide matching filter for introspection selftests/seccomp: Refactor RET_ERRNO tests selftests/seccomp: Add simple seccomp overhead benchmark selftests/seccomp: Add tests for basic ptrace actions
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-09-22 16:16:41 -1000
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-09-22 16:16:41 -1000
commit: c0a3a64e723324ae6dda53214061a71de63808c3 (patch)
tree: f148333ecf690df602faaf389fb484d4561c6bc4 /tools
parent: 69c902f597c4bec92013a526268620fb6255c24a (diff)
parent: 6849243bf4c6155151b294e9f0e0dc9540d6f083 (diff)
download: op-kernel-dev-c0a3a64e723324ae6dda53214061a71de63808c3.zip
op-kernel-dev-c0a3a64e723324ae6dda53214061a71de63808c3.tar.gz
3 files changed, 626 insertions, 101 deletions
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index aeb0c80..553d870 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,8 +1,16 @@
-TEST_GEN_PROGS := seccomp_bpf
-CFLAGS += -Wl,-no-as-needed -Wall
-LDFLAGS += -lpthread
+all:
 
 include ../lib.mk
 
-$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
-	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 0000000..5838c86
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+	pid_t pid, ret;
+	unsigned long long i;
+	struct timespec start, finish;
+
+	pid = getpid();
+	assert(clock_gettime(clk_id, &start) == 0);
+	for (i = 0; i < samples; i++) {
+		ret = syscall(__NR_getpid);
+		assert(pid == ret);
+	}
+	assert(clock_gettime(clk_id, &finish) == 0);
+
+	i = finish.tv_sec - start.tv_sec;
+	i *= 1000000000;
+	i += finish.tv_nsec - start.tv_nsec;
+
+	printf("%lu.%09lu - %lu.%09lu = %llu\n",
+		finish.tv_sec, finish.tv_nsec,
+		start.tv_sec, start.tv_nsec,
+		i);
+
+	return i;
+}
+
+unsigned long long calibrate(void)
+{
+	unsigned long long i;
+
+	printf("Calibrating reasonable sample size...\n");
+
+	for (i = 5; ; i++) {
+		unsigned long long samples = 1 << i;
+
+		/* Find something that takes more than 5 seconds to run. */
+		if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+			return samples;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	unsigned long long samples;
+	unsigned long long native, filtered;
+
+	if (argc > 1)
+		samples = strtoull(argv[1], NULL, 0);
+	else
+		samples = calibrate();
+
+	printf("Benchmarking %llu samples...\n", samples);
+
+	native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid native: %llu ns\n", native);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	assert(ret == 0);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	assert(ret == 0);
+
+	filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+	printf("Estimated seccomp overhead per syscall: %llu ns\n",
+		filtered - native);
+
+	if (filtered == native)
+		printf("Trying running again with more samples.\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 4d6f92a..67c3e27 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,17 +68,7 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
-#ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
-
-/* Masks for the return value sections. */
-#define SECCOMP_RET_ACTION      0x7fff0000U
-#define SECCOMP_RET_DATA        0x0000ffffU
-
+#ifndef SECCOMP_RET_ALLOW
 struct seccomp_data {
 	int nr;
 	__u32 arch;
@@ -87,6 +77,70 @@ struct seccomp_data {
 };
 #endif
 
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 #elif __BYTE_ORDER == __BIG_ENDIAN
@@ -136,7 +190,7 @@ TEST(no_new_privs_support)
 	}
 }
 
-/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 TEST(mode_filter_support)
 {
 	long ret;
@@ -342,6 +396,28 @@ TEST(empty_prog)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 {
 	struct sock_filter filter[] = {
@@ -520,6 +596,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 	close(fd);
 }
 
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+	bool die = (bool)data;
+
+	if (die) {
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+	pthread_t thread;
+	void *status;
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter_thread[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_thread = {
+		.len = (unsigned short)ARRAY_SIZE(filter_thread),
+		.filter = filter_thread,
+	};
+	struct sock_filter filter_process[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_process = {
+		.len = (unsigned short)ARRAY_SIZE(filter_process),
+		.filter = filter_process,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+			     kill_process ? &prog_process : &prog_thread));
+
+	/*
+	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+	 * flag cannot be downgraded by a new filter.
+	 */
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+	/* Start a thread that will exit immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+	/* Start a thread that will die immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+	/*
+	 * If we get here, only the spawned thread died. Let the parent know
+	 * the whole process didn't die (i.e. this thread, the spawner,
+	 * stayed running).
+	 */
+	exit(42);
+}
+
+TEST(KILL_thread)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, false);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If only the thread was killed, we'll see exit 42. */
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, true);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -541,26 +728,30 @@ TEST(arg_out_of_range)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+#define ERRNO_FILTER(name, errno)					\
+	struct sock_filter _read_filter_##name[] = {			\
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
+			offsetof(struct seccomp_data, nr)),		\
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
+	};								\
+	struct sock_fprog prog_##name = {				\
+		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
+		.filter = _read_filter_##name,				\
+	}
+
+/* Make sure basic errno values are correctly passed through a filter. */
 TEST(ERRNO_valid)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(valid, E2BIG);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -568,26 +759,17 @@ TEST(ERRNO_valid)
 	EXPECT_EQ(E2BIG, errno);
 }
 
+/* Make sure an errno of zero is correctly handled by the arch code. */
 TEST(ERRNO_zero)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(zero, 0);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -595,26 +777,21 @@ TEST(ERRNO_zero)
 	EXPECT_EQ(0, read(0, NULL, 0));
 }
 
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
 TEST(ERRNO_capped)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(capped, 4096);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -622,6 +799,37 @@ TEST(ERRNO_capped)
 	EXPECT_EQ(4095, errno);
 }
 
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+	ERRNO_FILTER(first,  11);
+	ERRNO_FILTER(second, 13);
+	ERRNO_FILTER(third,  12);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(12, errno);
+}
+
 FIXTURE_DATA(TRAP) {
 	struct sock_fprog prog;
 };
@@ -735,6 +943,7 @@ TEST_F(TRAP, handler)
 
 FIXTURE_DATA(precedence) {
 	struct sock_fprog allow;
+	struct sock_fprog log;
 	struct sock_fprog trace;
 	struct sock_fprog error;
 	struct sock_fprog trap;
@@ -746,6 +955,13 @@ FIXTURE_SETUP(precedence)
 	struct sock_filter allow_insns[] = {
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
 	struct sock_filter trace_insns[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
@@ -782,6 +998,7 @@ FIXTURE_SETUP(precedence)
 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
 	FILTER_ALLOC(trace);
 	FILTER_ALLOC(error);
 	FILTER_ALLOC(trap);
@@ -792,6 +1009,7 @@ FIXTURE_TEARDOWN(precedence)
 {
 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 	FILTER_FREE(allow);
+	FILTER_FREE(log);
 	FILTER_FREE(trace);
 	FILTER_FREE(error);
 	FILTER_FREE(trap);
@@ -809,6 +1027,8 @@ TEST_F(precedence, allow_ok)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -833,6 +1053,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -864,6 +1086,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -885,6 +1109,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -910,6 +1136,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +1159,8 @@ TEST_F(precedence, errno_is_third)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -949,6 +1179,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -971,6 +1203,8 @@ TEST_F(precedence, trace_is_fourth)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
@@ -992,12 +1226,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* No ptracer */
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 }
 
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
 #ifndef PTRACE_O_TRACESECCOMP
 #define PTRACE_O_TRACESECCOMP	0x00000080
 #endif
@@ -1262,6 +1538,13 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
+#endif
+
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
@@ -1357,7 +1640,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 		TH_LOG("Can't modify syscall return on this architecture");
 #else
-		regs.SYSCALL_RET = 1;
+		regs.SYSCALL_RET = EPERM;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1426,6 +1709,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 
 	if (nr == __NR_getpid)
 		change_syscall(_metadata, tracee, __NR_getppid);
+	if (nr == __NR_open)
+		change_syscall(_metadata, tracee, -1);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1480,6 +1765,28 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 		free(self->prog.filter);
 }
 
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer will redirect getpid to getppid. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in EPERM. */
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+}
+
 TEST_F(TRACE_syscall, syscall_allowed)
 {
 	long ret;
@@ -1520,13 +1827,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
 	ASSERT_EQ(0, ret);
 
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-	/* gettid has been skipped */
-	EXPECT_EQ(-1, syscall(__NR_gettid));
-#else
 	/* gettid has been skipped and an altered return value stored. */
-	EXPECT_EQ(1, syscall(__NR_gettid));
-#endif
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
@@ -1557,6 +1859,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
 	ASSERT_EQ(0, ret);
 
 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	errno = 0;
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 	EXPECT_EQ(EPERM, errno);
 }
@@ -1654,47 +1957,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-#ifndef __NR_seccomp
-# if defined(__i386__)
-#  define __NR_seccomp 354
-# elif defined(__x86_64__)
-#  define __NR_seccomp 317
-# elif defined(__arm__)
-#  define __NR_seccomp 383
-# elif defined(__aarch64__)
-#  define __NR_seccomp 277
-# elif defined(__hppa__)
-#  define __NR_seccomp 338
-# elif defined(__powerpc__)
-#  define __NR_seccomp 358
-# elif defined(__s390__)
-#  define __NR_seccomp 348
-# else
-#  warning "seccomp syscall number unknown for this architecture"
-#  define __NR_seccomp 0xffff
-# endif
-#endif
-
-#ifndef SECCOMP_SET_MODE_STRICT
-#define SECCOMP_SET_MODE_STRICT 0
-#endif
-
-#ifndef SECCOMP_SET_MODE_FILTER
-#define SECCOMP_SET_MODE_FILTER 1
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#endif
-
-#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, void *args)
-{
-	errno = 0;
-	return syscall(__NR_seccomp, op, flags, args);
-}
-#endif
-
 TEST(seccomp_syscall)
 {
 	struct sock_filter filter[] = {
@@ -1783,6 +2045,67 @@ TEST(seccomp_syscall_mode_lock)
 	}
 }
 
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		flag = flags[i];
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
 TEST(TSYNC_first)
 {
 	struct sock_filter filter[] = {
@@ -2421,6 +2744,99 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
 /*
  * TODO:
  * - add microbenchmarks
@@ -2429,6 +2845,8 @@ TEST(syscall_restart)
  * - endianness checking when appropriate
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
  * - ...
  */
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-09-22 16:16:41 -1000
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-09-22 16:16:41 -1000
commit	c0a3a64e723324ae6dda53214061a71de63808c3 (patch)
tree	f148333ecf690df602faaf389fb484d4561c6bc4 /tools
parent	69c902f597c4bec92013a526268620fb6255c24a (diff)
parent	6849243bf4c6155151b294e9f0e0dc9540d6f083 (diff)
download	op-kernel-dev-c0a3a64e723324ae6dda53214061a71de63808c3.zip op-kernel-dev-c0a3a64e723324ae6dda53214061a71de63808c3.tar.gz