summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-04-25 22:55:33 -0400
committerDavid S. Miller <davem@davemloft.net>2018-04-25 22:55:33 -0400
commit25eb0ea7174c6e84f21fa59dccbddd0318b17b12 (patch)
tree07ecdbd7810e448e3b744c0ef143480a6c4e129d
parent91a825290ca4eae88603bc811bf74a45f94a3f46 (diff)
parent9c299a32ede98dc9faafb267034ed830a15304db (diff)
downloadop-kernel-dev-25eb0ea7174c6e84f21fa59dccbddd0318b17b12.zip
op-kernel-dev-25eb0ea7174c6e84f21fa59dccbddd0318b17b12.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2018-04-25 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) Fix to clear the percpu metadata_dst that could otherwise carry stale ip_tunnel_info, from William. 2) Fix that reduces the number of passes in x64 JIT with regards to dead code sanitation to avoid risk of prog rejection, from Gianluca. 3) Several fixes of sockmap programs, besides others, fixing a double page_put() in error path, missing refcount hold for pinned sockmap, adding required -target bpf for clang in sample Makefile, from John. 4) Fix to disable preemption in __BPF_PROG_RUN_ARRAY() paths, from Roman. 5) Fix tools/bpf/ Makefile with regards to a lex/yacc build error seen on older gcc-5, from John. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/bpf/bpf_devel_QA.txt10
-rw-r--r--arch/x86/net/bpf_jit_comp.c12
-rw-r--r--include/linux/bpf.h4
-rw-r--r--kernel/bpf/arraymap.c3
-rw-r--r--kernel/bpf/sockmap.c51
-rw-r--r--kernel/bpf/syscall.c4
-rw-r--r--net/core/filter.c1
-rw-r--r--samples/sockmap/Makefile7
-rw-r--r--tools/bpf/Makefile2
9 files changed, 82 insertions, 12 deletions
diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt
index 1a0b704..da57601 100644
--- a/Documentation/bpf/bpf_devel_QA.txt
+++ b/Documentation/bpf/bpf_devel_QA.txt
@@ -557,6 +557,14 @@ A: Although LLVM IR generation and optimization try to stay architecture
pulls in some header files containing file scope host assembly codes.
- You can add "-fno-jump-tables" to work around the switch table issue.
- Otherwise, you can use bpf target.
+ Otherwise, you can use bpf target. Additionally, you _must_ use bpf target
+ when:
+
+ - Your program uses data structures with pointer or long / unsigned long
+ types that interface with BPF helpers or context data structures. Access
+ into these structures is verified by the BPF verifier and may result
+ in verification failures if the native architecture is not aligned with
+ the BPF architecture, e.g. 64-bit. An example of this is
+ BPF_PROG_TYPE_SK_MSG require '-target bpf'
Happy BPF hacking!
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b7251541..abce27c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1027,7 +1027,17 @@ emit_cond_jmp: /* convert BPF opcode to x86 */
break;
case BPF_JMP | BPF_JA:
- jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+
if (!jmp_offset)
/* optimize out nop jumps */
break;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 486e65e..469b20e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@ struct bpf_map_ops {
void (*map_release)(struct bpf_map *map, struct file *map_file);
void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+ void (*map_release_uref)(struct bpf_map *map);
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
@@ -351,6 +352,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
struct bpf_prog **_prog, *__prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
+ preempt_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
@@ -362,6 +364,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
} \
_out: \
rcu_read_unlock(); \
+ preempt_enable_no_resched(); \
_ret; \
})
@@ -434,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
-void bpf_fd_array_map_clear(struct bpf_map *map);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 14750e7..027107f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
@@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
+ .map_release_uref = bpf_fd_array_map_clear,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index a3b2138..634415c 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -43,6 +43,7 @@
#include <net/tcp.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
+#include <linux/sched/signal.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -523,8 +524,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
i = md->sg_start;
do {
- r->sg_data[i] = md->sg_data[i];
-
size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length;
@@ -535,6 +534,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
}
sk_mem_charge(sk, size);
+ r->sg_data[i] = md->sg_data[i];
r->sg_data[i].length = size;
md->sg_data[i].length -= size;
md->sg_data[i].offset += size;
@@ -732,6 +732,26 @@ out_err:
return err;
}
+static int bpf_wait_data(struct sock *sk,
+ struct smap_psock *psk, int flags,
+ long timeo, int *err)
+{
+ int rc;
+
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ rc = sk_wait_event(sk, &timeo,
+ !list_empty(&psk->ingress) ||
+ !skb_queue_empty(&sk->sk_receive_queue),
+ &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+
+ return rc;
+}
+
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -755,6 +775,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
+bytes_ready:
while (copied != len) {
struct scatterlist *sg;
struct sk_msg_buff *md;
@@ -809,6 +830,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
+ if (!copied) {
+ long timeo;
+ int data;
+ int err = 0;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = bpf_wait_data(sk, psock, flags, timeo, &err);
+
+ if (data) {
+ if (!skb_queue_empty(&sk->sk_receive_queue)) {
+ release_sock(sk);
+ smap_release_sock(psock, sk);
+ copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ return copied;
+ }
+ goto bytes_ready;
+ }
+
+ if (err)
+ copied = err;
+ }
+
release_sock(sk);
smap_release_sock(psock, sk);
return copied;
@@ -1831,7 +1874,7 @@ static int sock_map_update_elem(struct bpf_map *map,
return err;
}
-static void sock_map_release(struct bpf_map *map, struct file *map_file)
+static void sock_map_release(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig;
@@ -1855,7 +1898,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
- .map_release = sock_map_release,
+ .map_release_uref = sock_map_release,
};
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ca46df..ebfe9f2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -257,8 +257,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->usercnt)) {
- if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
- bpf_fd_array_map_clear(map);
+ if (map->ops->map_release_uref)
+ map->ops->map_release_uref(map);
}
}
diff --git a/net/core/filter.c b/net/core/filter.c
index d31aff9..e77c30c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3240,6 +3240,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
skb_dst_set(skb, (struct dst_entry *) md);
info = &md->u.tun_info;
+ memset(info, 0, sizeof(*info));
info->mode = IP_TUNNEL_INFO_TX;
info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile
index 9bf2881..fa53f4d 100644
--- a/samples/sockmap/Makefile
+++ b/samples/sockmap/Makefile
@@ -65,11 +65,14 @@ $(src)/*.c: verify_target_bpf
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
+#
+# -target bpf option required with SK_MSG programs, this is to ensure
+# reading 'void *' data types for data and data_end are __u64 reads.
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
-Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
- -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+ -Wno-unknown-warning-option -O2 -target bpf \
+ -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 1ea5459..53b60ad 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le
$(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
clean: bpftool_clean
$(call QUIET_CLEAN, bpf-progs)
OpenPOWER on IntegriCloud