summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/cgroup.c15
-rw-r--r--kernel/bpf/core.c4
-rw-r--r--kernel/bpf/disasm.c65
-rw-r--r--kernel/bpf/disasm.h29
-rw-r--r--kernel/bpf/syscall.c93
-rw-r--r--kernel/bpf/verifier.c126
6 files changed, 283 insertions, 49 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index b789ab7..c1c0b60 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -568,6 +568,8 @@ static bool cgroup_dev_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (type == BPF_WRITE)
return false;
@@ -576,8 +578,17 @@ static bool cgroup_dev_is_valid_access(int off, int size,
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
+
+ switch (off) {
+ case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
+ bpf_ctx_record_field_size(info, size_default);
+ if (!bpf_ctx_narrow_access_ok(off, size, size_default))
+ return false;
+ break;
+ default:
+ if (size != size_default)
+ return false;
+ }
return true;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 768e0a0..70a5345 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -771,7 +771,9 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog)
/* Base function for offset calculation. Needs to go into .text section,
* therefore keeping it non-static as well; will also be used by JITs
- * anyway later on, so do not let the compiler omit it.
+ * anyway later on, so do not let the compiler omit it. This also needs
+ * to go into kallsyms for correlation from e.g. bpftool, so naming
+ * must not change.
*/
noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
index 883f88f..8740406 100644
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -21,10 +21,39 @@ static const char * const func_id_str[] = {
};
#undef __BPF_FUNC_STR_FN
-const char *func_id_name(int id)
+static const char *__func_get_name(const struct bpf_insn_cbs *cbs,
+ const struct bpf_insn *insn,
+ char *buff, size_t len)
{
BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
+ if (insn->src_reg != BPF_PSEUDO_CALL &&
+ insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID &&
+ func_id_str[insn->imm])
+ return func_id_str[insn->imm];
+
+ if (cbs && cbs->cb_call)
+ return cbs->cb_call(cbs->private_data, insn);
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ snprintf(buff, len, "%+d", insn->imm);
+
+ return buff;
+}
+
+static const char *__func_imm_name(const struct bpf_insn_cbs *cbs,
+ const struct bpf_insn *insn,
+ u64 full_imm, char *buff, size_t len)
+{
+ if (cbs && cbs->cb_imm)
+ return cbs->cb_imm(cbs->private_data, insn, full_imm);
+
+ snprintf(buff, len, "0x%llx", (unsigned long long)full_imm);
+ return buff;
+}
+
+const char *func_id_name(int id)
+{
if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
return func_id_str[id];
else
@@ -83,7 +112,7 @@ static const char *const bpf_jmp_string[16] = {
[BPF_EXIT >> 4] = "exit",
};
-static void print_bpf_end_insn(bpf_insn_print_cb verbose,
+static void print_bpf_end_insn(bpf_insn_print_t verbose,
struct bpf_verifier_env *env,
const struct bpf_insn *insn)
{
@@ -92,9 +121,12 @@ static void print_bpf_end_insn(bpf_insn_print_cb verbose,
insn->imm, insn->dst_reg);
}
-void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
- const struct bpf_insn *insn, bool allow_ptr_leaks)
+void print_bpf_insn(const struct bpf_insn_cbs *cbs,
+ struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ bool allow_ptr_leaks)
{
+ const bpf_insn_print_t verbose = cbs->cb_print;
u8 class = BPF_CLASS(insn->code);
if (class == BPF_ALU || class == BPF_ALU64) {
@@ -175,12 +207,15 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
*/
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+ char tmp[64];
if (map_ptr && !allow_ptr_leaks)
imm = 0;
- verbose(env, "(%02x) r%d = 0x%llx\n", insn->code,
- insn->dst_reg, (unsigned long long)imm);
+ verbose(env, "(%02x) r%d = %s\n",
+ insn->code, insn->dst_reg,
+ __func_imm_name(cbs, insn, imm,
+ tmp, sizeof(tmp)));
} else {
verbose(env, "BUG_ld_%02x\n", insn->code);
return;
@@ -189,12 +224,20 @@ void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
u8 opcode = BPF_OP(insn->code);
if (opcode == BPF_CALL) {
- if (insn->src_reg == BPF_PSEUDO_CALL)
- verbose(env, "(%02x) call pc%+d\n", insn->code,
- insn->imm);
- else
+ char tmp[64];
+
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
+ verbose(env, "(%02x) call pc%s\n",
+ insn->code,
+ __func_get_name(cbs, insn,
+ tmp, sizeof(tmp)));
+ } else {
+ strcpy(tmp, "unknown");
verbose(env, "(%02x) call %s#%d\n", insn->code,
- func_id_name(insn->imm), insn->imm);
+ __func_get_name(cbs, insn,
+ tmp, sizeof(tmp)),
+ insn->imm);
+ }
} else if (insn->code == (BPF_JMP | BPF_JA)) {
verbose(env, "(%02x) goto pc%+d\n",
insn->code, insn->off);
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
index 8de977e..e0857d0 100644
--- a/kernel/bpf/disasm.h
+++ b/kernel/bpf/disasm.h
@@ -17,16 +17,35 @@
#include <linux/bpf.h>
#include <linux/kernel.h>
#include <linux/stringify.h>
+#ifndef __KERNEL__
+#include <stdio.h>
+#include <string.h>
+#endif
+
+struct bpf_verifier_env;
extern const char *const bpf_alu_string[16];
extern const char *const bpf_class_string[8];
const char *func_id_name(int id);
-struct bpf_verifier_env;
-typedef void (*bpf_insn_print_cb)(struct bpf_verifier_env *env,
- const char *, ...);
-void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
- const struct bpf_insn *insn, bool allow_ptr_leaks);
+typedef void (*bpf_insn_print_t)(struct bpf_verifier_env *env,
+ const char *, ...);
+typedef const char *(*bpf_insn_revmap_call_t)(void *private_data,
+ const struct bpf_insn *insn);
+typedef const char *(*bpf_insn_print_imm_t)(void *private_data,
+ const struct bpf_insn *insn,
+ __u64 full_imm);
+
+struct bpf_insn_cbs {
+ bpf_insn_print_t cb_print;
+ bpf_insn_revmap_call_t cb_call;
+ bpf_insn_print_imm_t cb_imm;
+ void *private_data;
+};
+void print_bpf_insn(const struct bpf_insn_cbs *cbs,
+ struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ bool allow_ptr_leaks);
#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e2e1c78..007802c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -937,10 +937,16 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ int i;
+
trace_bpf_prog_put_rcu(prog);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
+
+ for (i = 0; i < prog->aux->func_cnt; i++)
+ bpf_prog_kallsyms_del(prog->aux->func[i]);
bpf_prog_kallsyms_del(prog);
+
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
}
}
@@ -1552,6 +1558,67 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
return fd;
}
+static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
+ unsigned long addr)
+{
+ int i;
+
+ for (i = 0; i < prog->aux->used_map_cnt; i++)
+ if (prog->aux->used_maps[i] == (void *)addr)
+ return prog->aux->used_maps[i];
+ return NULL;
+}
+
+static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
+{
+ const struct bpf_map *map;
+ struct bpf_insn *insns;
+ u64 imm;
+ int i;
+
+ insns = kmemdup(prog->insnsi, bpf_prog_insn_size(prog),
+ GFP_USER);
+ if (!insns)
+ return insns;
+
+ for (i = 0; i < prog->len; i++) {
+ if (insns[i].code == (BPF_JMP | BPF_TAIL_CALL)) {
+ insns[i].code = BPF_JMP | BPF_CALL;
+ insns[i].imm = BPF_FUNC_tail_call;
+ /* fall-through */
+ }
+ if (insns[i].code == (BPF_JMP | BPF_CALL) ||
+ insns[i].code == (BPF_JMP | BPF_CALL_ARGS)) {
+ if (insns[i].code == (BPF_JMP | BPF_CALL_ARGS))
+ insns[i].code = BPF_JMP | BPF_CALL;
+ if (!bpf_dump_raw_ok())
+ insns[i].imm = 0;
+ continue;
+ }
+
+ if (insns[i].code != (BPF_LD | BPF_IMM | BPF_DW))
+ continue;
+
+ imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm;
+ map = bpf_map_from_imm(prog, imm);
+ if (map) {
+ insns[i].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[i].imm = map->id;
+ insns[i + 1].imm = 0;
+ continue;
+ }
+
+ if (!bpf_dump_raw_ok() &&
+ imm == (unsigned long)prog->aux) {
+ insns[i].imm = 0;
+ insns[i + 1].imm = 0;
+ continue;
+ }
+ }
+
+ return insns;
+}
+
static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
@@ -1602,18 +1669,34 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
ulen = info.jited_prog_len;
info.jited_prog_len = prog->jited_len;
if (info.jited_prog_len && ulen) {
- uinsns = u64_to_user_ptr(info.jited_prog_insns);
- ulen = min_t(u32, info.jited_prog_len, ulen);
- if (copy_to_user(uinsns, prog->bpf_func, ulen))
- return -EFAULT;
+ if (bpf_dump_raw_ok()) {
+ uinsns = u64_to_user_ptr(info.jited_prog_insns);
+ ulen = min_t(u32, info.jited_prog_len, ulen);
+ if (copy_to_user(uinsns, prog->bpf_func, ulen))
+ return -EFAULT;
+ } else {
+ info.jited_prog_insns = 0;
+ }
}
ulen = info.xlated_prog_len;
info.xlated_prog_len = bpf_prog_insn_size(prog);
if (info.xlated_prog_len && ulen) {
+ struct bpf_insn *insns_sanitized;
+ bool fault;
+
+ if (prog->blinded && !bpf_dump_raw_ok()) {
+ info.xlated_prog_insns = 0;
+ goto done;
+ }
+ insns_sanitized = bpf_insn_prepare_dump(prog);
+ if (!insns_sanitized)
+ return -ENOMEM;
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
ulen = min_t(u32, info.xlated_prog_len, ulen);
- if (copy_to_user(uinsns, prog->insnsi, ulen))
+ fault = copy_to_user(uinsns, insns_sanitized, ulen);
+ kfree(insns_sanitized);
+ if (fault)
return -EFAULT;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1cd2c2d..98d8637 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -772,7 +772,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
return -EPERM;
}
if (bpf_prog_is_dev_bound(env->prog->aux)) {
- verbose(env, "funcation calls in offloaded programs are not supported yet\n");
+ verbose(env, "function calls in offloaded programs are not supported yet\n");
return -EINVAL;
}
ret = add_subprog(env, i + insn[i].imm + 1);
@@ -823,6 +823,7 @@ next:
return 0;
}
+static
struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
const struct bpf_verifier_state *state,
struct bpf_verifier_state *parent,
@@ -867,7 +868,7 @@ bug:
verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
verbose(env, "regno %d parent frame %d current frame %d\n",
regno, parent->curframe, state->curframe);
- return 0;
+ return NULL;
}
static int mark_reg_read(struct bpf_verifier_env *env,
@@ -1434,33 +1435,80 @@ static int update_stack_depth(struct bpf_verifier_env *env,
const struct bpf_func_state *func,
int off)
{
- u16 stack = env->subprog_stack_depth[func->subprogno], total = 0;
- struct bpf_verifier_state *cur = env->cur_state;
- int i;
+ u16 stack = env->subprog_stack_depth[func->subprogno];
if (stack >= -off)
return 0;
/* update known max for given subprogram */
env->subprog_stack_depth[func->subprogno] = -off;
+ return 0;
+}
- /* compute the total for current call chain */
- for (i = 0; i <= cur->curframe; i++) {
- u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno];
-
- /* round up to 32-bytes, since this is granularity
- * of interpreter stack sizes
- */
- depth = round_up(depth, 32);
- total += depth;
- }
+/* starting from main bpf function walk all instructions of the function
+ * and recursively walk all callees that given function can call.
+ * Ignore jump and exit insns.
+ * Since recursion is prevented by check_cfg() this algorithm
+ * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+ */
+static int check_max_stack_depth(struct bpf_verifier_env *env)
+{
+ int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ int ret_insn[MAX_CALL_FRAMES];
+ int ret_prog[MAX_CALL_FRAMES];
- if (total > MAX_BPF_STACK) {
+process_func:
+ /* round up to 32-bytes, since this is granularity
+ * of interpreter stack size
+ */
+ depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+ if (depth > MAX_BPF_STACK) {
verbose(env, "combined stack size of %d calls is %d. Too large\n",
- cur->curframe, total);
+ frame + 1, depth);
return -EACCES;
}
- return 0;
+continue_func:
+ if (env->subprog_cnt == subprog)
+ subprog_end = insn_cnt;
+ else
+ subprog_end = env->subprog_starts[subprog];
+ for (; i < subprog_end; i++) {
+ if (insn[i].code != (BPF_JMP | BPF_CALL))
+ continue;
+ if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ continue;
+ /* remember insn and function to return to */
+ ret_insn[frame] = i + 1;
+ ret_prog[frame] = subprog;
+
+ /* find the callee */
+ i = i + insn[i].imm + 1;
+ subprog = find_subprog(env, i);
+ if (subprog < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ i);
+ return -EFAULT;
+ }
+ subprog++;
+ frame++;
+ if (frame >= MAX_CALL_FRAMES) {
+ WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
+ return -EFAULT;
+ }
+ goto process_func;
+ }
+ /* end of for() loop means the last insn of the 'subprog'
+ * was reached. Doesn't matter whether it was JA or EXIT
+ */
+ if (frame == 0)
+ return 0;
+ depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+ frame--;
+ i = ret_insn[frame];
+ subprog = ret_prog[frame];
+ goto continue_func;
}
static int get_callee_stack_depth(struct bpf_verifier_env *env,
@@ -2105,9 +2153,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_func_state *caller, *callee;
int i, subprog, target_insn;
- if (state->curframe >= MAX_CALL_FRAMES) {
+ if (state->curframe + 1 >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep\n",
- state->curframe);
+ state->curframe + 2);
return -E2BIG;
}
@@ -4155,7 +4203,7 @@ static bool stacksafe(struct bpf_func_state *old,
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
/* explored state didn't use this */
- return true;
+ continue;
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
@@ -4475,9 +4523,12 @@ static int do_check(struct bpf_verifier_env *env)
}
if (env->log.level) {
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = verbose,
+ };
+
verbose(env, "%d: ", insn_idx);
- print_bpf_insn(verbose, env, insn,
- env->allow_ptr_leaks);
+ print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
}
err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
@@ -5065,14 +5116,14 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
- struct bpf_insn *insn = prog->insnsi;
+ struct bpf_insn *insn;
void *old_bpf_func;
int err = -ENOMEM;
if (env->subprog_cnt == 0)
return 0;
- for (i = 0; i < prog->len; i++, insn++) {
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
@@ -5111,7 +5162,10 @@ static int jit_subprogs(struct bpf_verifier_env *env)
goto out_free;
memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
len * sizeof(struct bpf_insn));
+ func[i]->type = prog->type;
func[i]->len = len;
+ if (bpf_prog_calc_tag(func[i]))
+ goto out_free;
func[i]->is_func = 1;
/* Use bpf_prog_F_tag to indicate functions in stack traces.
* Long term would need debug info to populate names
@@ -5161,6 +5215,25 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_lock_ro(func[i]);
bpf_prog_kallsyms_add(func[i]);
}
+
+ /* Last step: make now unused interpreter insns from main
+ * prog consistent for later dump requests, so they can
+ * later look the same as if they were interpreted only.
+ */
+ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ unsigned long addr;
+
+ if (insn->code != (BPF_JMP | BPF_CALL) ||
+ insn->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ insn->off = env->insn_aux_data[i].call_imm;
+ subprog = find_subprog(env, i + insn->off + 1);
+ addr = (unsigned long)func[subprog + 1]->bpf_func;
+ addr &= PAGE_MASK;
+ insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+ addr - __bpf_call_base;
+ }
+
prog->jited = 1;
prog->bpf_func = func[0]->bpf_func;
prog->aux->func = func;
@@ -5427,6 +5500,9 @@ skip_full_check:
sanitize_dead_code(env);
if (ret == 0)
+ ret = check_max_stack_depth(env);
+
+ if (ret == 0)
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
OpenPOWER on IntegriCloud