From 8ac270d1e29f0428228ab2b9a8ae5e1ed4a5cd84 Mon Sep 17 00:00:00 2001 From: Will Drewry Date: Thu, 12 Apr 2012 16:48:04 -0500 Subject: Documentation: prctl/seccomp_filter Documents how system call filtering using Berkeley Packet Filter programs works and how it may be used. Includes an example for x86 and a semi-generic example using a macro-based code generator. Acked-by: Eric Paris Signed-off-by: Will Drewry Acked-by: Kees Cook v18: - added acked by - update no new privs numbers v17: - remove @compat note and add Pitfalls section for arch checking (keescook@chromium.org) v16: - v15: - v14: - rebase/nochanges v13: - rebase on to 88ebdda6159ffc15699f204c33feb3e431bf9bdc v12: - comment on the ptrace_event use - update arch support comment - note the behavior of SECCOMP_RET_DATA when there are multiple filters (keescook@chromium.org) - lots of samples/ clean up incl 64-bit bpf-direct support (markus@chromium.org) - rebase to linux-next v11: - overhaul return value language, updates (keescook@chromium.org) - comment on do_exit(SIGSYS) v10: - update for SIGSYS - update for new seccomp_data layout - update for ptrace option use v9: - updated bpf-direct.c for SIGILL v8: - add PR_SET_NO_NEW_PRIVS to the samples. v7: - updated for all the new stuff in v7: TRAP, TRACE - only talk about PR_SET_SECCOMP now - fixed bad JLE32 check (coreyb@linux.vnet.ibm.com) - adds dropper.c: a simple system call disabler v6: - tweak the language to note the requirement of PR_SET_NO_NEW_PRIVS being called prior to use. (luto@mit.edu) v5: - update sample to use system call arguments - adds a "fancy" example using a macro-based generator - cleaned up bpf in the sample - update docs to mention arguments - fix prctl value (eparis@redhat.com) - language cleanup (rdunlap@xenotime.net) v4: - update for no_new_privs use - minor tweaks v3: - call out BPF <-> Berkeley Packet Filter (rdunlap@xenotime.net) - document use of tentative always-unprivileged - guard sample compilation for i386 and x86_64 v2: - move code to samples (corbet@lwn.net) Signed-off-by: James Morris --- samples/seccomp/bpf-helper.h | 238 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 samples/seccomp/bpf-helper.h (limited to 'samples/seccomp/bpf-helper.h') diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h new file mode 100644 index 0000000..643279d --- /dev/null +++ b/samples/seccomp/bpf-helper.h @@ -0,0 +1,238 @@ +/* + * Example wrapper around BPF macros. + * + * Copyright (c) 2012 The Chromium OS Authors + * Author: Will Drewry + * + * The code may be used by anyone for any purpose, + * and can serve as a starting point for developing + * applications using prctl(PR_SET_SECCOMP, 2, ...). + * + * No guarantees are provided with respect to the correctness + * or functionality of this code. + */ +#ifndef __BPF_HELPER_H__ +#define __BPF_HELPER_H__ + +#include /* for __BITS_PER_LONG */ +#include +#include +#include /* for seccomp_data */ +#include +#include +#include + +#define BPF_LABELS_MAX 256 +struct bpf_labels { + int count; + struct __bpf_label { + const char *label; + __u32 location; + } labels[BPF_LABELS_MAX]; +}; + +int bpf_resolve_jumps(struct bpf_labels *labels, + struct sock_filter *filter, size_t count); +__u32 seccomp_bpf_label(struct bpf_labels *labels, const char *label); +void seccomp_bpf_print(struct sock_filter *filter, size_t count); + +#define JUMP_JT 0xff +#define JUMP_JF 0xff +#define LABEL_JT 0xfe +#define LABEL_JF 0xfe + +#define ALLOW \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) +#define DENY \ + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) +#define JUMP(labels, label) \ + BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ + JUMP_JT, JUMP_JF) +#define LABEL(labels, label) \ + BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ + LABEL_JT, LABEL_JF) +#define SYSCALL(nr, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (nr), 0, 1), \ + jt + +/* Lame, but just an example */ +#define FIND_LABEL(labels, label) seccomp_bpf_label((labels), #label) + +#define EXPAND(...) __VA_ARGS__ +/* Map all width-sensitive operations */ +#if __BITS_PER_LONG == 32 + +#define JEQ(x, jt) JEQ32(x, EXPAND(jt)) +#define JNE(x, jt) JNE32(x, EXPAND(jt)) +#define JGT(x, jt) JGT32(x, EXPAND(jt)) +#define JLT(x, jt) JLT32(x, EXPAND(jt)) +#define JGE(x, jt) JGE32(x, EXPAND(jt)) +#define JLE(x, jt) JLE32(x, EXPAND(jt)) +#define JA(x, jt) JA32(x, EXPAND(jt)) +#define ARG(i) ARG_32(i) +#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + +#elif __BITS_PER_LONG == 64 + +/* Ensure that we load the logically correct offset. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define ENDIAN(_lo, _hi) _lo, _hi +#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) +#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define ENDIAN(_lo, _hi) _hi, _lo +#define LO_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) + sizeof(__u32) +#define HI_ARG(idx) offsetof(struct seccomp_data, args[(idx)]) +#else +#error "Unknown endianness" +#endif + +union arg64 { + struct { + __u32 ENDIAN(lo32, hi32); + }; + __u64 u64; +}; + +#define JEQ(x, jt) \ + JEQ64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JGT(x, jt) \ + JGT64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JGE(x, jt) \ + JGE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JNE(x, jt) \ + JNE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JLT(x, jt) \ + JLT64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define JLE(x, jt) \ + JLE64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) + +#define JA(x, jt) \ + JA64(((union arg64){.u64 = (x)}).lo32, \ + ((union arg64){.u64 = (x)}).hi32, \ + EXPAND(jt)) +#define ARG(i) ARG_64(i) + +#else +#error __BITS_PER_LONG value unusable. +#endif + +/* Loads the arg into A */ +#define ARG_32(idx) \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)) + +/* Loads hi into A and lo in X */ +#define ARG_64(idx) \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, LO_ARG(idx)), \ + BPF_STMT(BPF_ST, 0), /* lo -> M[0] */ \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, HI_ARG(idx)), \ + BPF_STMT(BPF_ST, 1) /* hi -> M[1] */ + +#define JEQ32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 0, 1), \ + jt + +#define JNE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (value), 1, 0), \ + jt + +/* Checks the lo, then swaps to check the hi. A=lo,X=hi */ +#define JEQ64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JNE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 5, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JA32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (value), 0, 1), \ + jt + +#define JA64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (hi), 3, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JSET+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JGE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 0, 1), \ + jt + +#define JLT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (value), 1, 0), \ + jt + +/* Shortcut checking if hi > arg.hi. */ +#define JGE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JLT64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGE+BPF_K, (hi), 0, 4), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JGT32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 0, 1), \ + jt + +#define JLE32(value, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (value), 1, 0), \ + jt + +/* Check hi > args.hi first, then do the GE checking */ +#define JGT64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 4, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 5), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 0, 2), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define JLE64(lo, hi, jt) \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (hi), 6, 0), \ + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (hi), 0, 3), \ + BPF_STMT(BPF_LD+BPF_MEM, 0), /* swap in lo */ \ + BPF_JUMP(BPF_JMP+BPF_JGT+BPF_K, (lo), 2, 0), \ + BPF_STMT(BPF_LD+BPF_MEM, 1), /* passed: swap hi back in */ \ + jt, \ + BPF_STMT(BPF_LD+BPF_MEM, 1) /* failed: swap hi back in */ + +#define LOAD_SYSCALL_NR \ + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ + offsetof(struct seccomp_data, nr)) + +#endif /* __BPF_HELPER_H__ */ -- cgit v1.1