summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-01 14:05:24 -0500
committerDavid S. Miller <davem@davemloft.net>2015-03-01 14:05:24 -0500
commit68932f7188f84673fdb0de14a00c2f428869c6ed (patch)
treedd4de9c9faa662e188285fa5db20663859139f55 /net
parentb656cc64cf815d8ff6e99883a531fafc48199bd0 (diff)
parente2e9b6541dd4b31848079da80fe2253daaafb549 (diff)
downloadop-kernel-dev-68932f7188f84673fdb0de14a00c2f428869c6ed.zip
op-kernel-dev-68932f7188f84673fdb0de14a00c2f428869c6ed.tar.gz
Merge branch 'ebpf_support_for_cls_bpf'
Daniel Borkmann says: ==================== eBPF support for cls_bpf This is the non-RFC version of my patchset posted before netdev01 [1] conference. It contains a couple of eBPF cleanups and preparation patches to get eBPF support into cls_bpf. The last patch adds the actual support. I'll post the iproute2 parts after the kernel bits are merged, an initial preview link to the code is mentioned in the last patch. Patch 4 and 5 were originally one patch, but I've split them into two parts upon request as patch 4 only is also needed for Alexei's tracing patches that go via tip tree. Tested with tc and all in-kernel available BPF test suites. I have configured and built LLVM with --enable-experimental-targets=BPF but as Alexei put it, the plan is to get rid of the experimental status in future [2]. Thanks a lot! v1 -> v2: - Removed arch patches from this series - x86 is already queued in tip tree, under x86/mm - arm64 just reposted directly to arm folks - Rest is unchanged [1] http://thread.gmane.org/gmane.linux.network/350191 [2] http://article.gmane.org/gmane.linux.kernel/1874969 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c47
-rw-r--r--net/sched/cls_bpf.c206
2 files changed, 177 insertions, 76 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index f6bdc2b..ff000cb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -814,7 +814,7 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
static void __bpf_prog_release(struct bpf_prog *prog)
{
- if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+ if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
} else {
bpf_release_orig_filter(prog);
@@ -1093,7 +1093,6 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);
-#ifdef CONFIG_BPF_SYSCALL
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
@@ -1106,8 +1105,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
- /* valid fd, but invalid program type */
+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
return -EINVAL;
}
@@ -1117,8 +1115,8 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
bpf_prog_put(prog);
return -ENOMEM;
}
- fp->prog = prog;
+ fp->prog = prog;
atomic_set(&fp->refcnt, 0);
if (!sk_filter_charge(sk, fp)) {
@@ -1136,10 +1134,8 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
return 0;
}
-/* allow socket filters to call
- * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
- */
-static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+static const struct bpf_func_proto *
+sk_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -1153,34 +1149,37 @@ static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func
}
}
-static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool sk_filter_is_valid_access(int off, int size,
+ enum bpf_access_type type)
{
/* skb fields cannot be accessed yet */
return false;
}
-static struct bpf_verifier_ops sock_filter_ops = {
- .get_func_proto = sock_filter_func_proto,
- .is_valid_access = sock_filter_is_valid_access,
+static const struct bpf_verifier_ops sk_filter_ops = {
+ .get_func_proto = sk_filter_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
};
-static struct bpf_prog_type_list tl = {
- .ops = &sock_filter_ops,
+static struct bpf_prog_type_list sk_filter_type __read_mostly = {
+ .ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};
-static int __init register_sock_filter_ops(void)
+static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+ .ops = &sk_filter_ops,
+ .type = BPF_PROG_TYPE_SCHED_CLS,
+};
+
+static int __init register_sk_filter_ops(void)
{
- bpf_register_prog_type(&tl);
+ bpf_register_prog_type(&sk_filter_type);
+ bpf_register_prog_type(&sched_cls_type);
+
return 0;
}
-late_initcall(register_sock_filter_ops);
-#else
-int sk_attach_bpf(u32 ufd, struct sock *sk)
-{
- return -EOPNOTSUPP;
-}
-#endif
+late_initcall(register_sk_filter_ops);
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5f3ee9e..6f7ed8f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -16,6 +16,8 @@
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
+#include <linux/bpf.h>
+
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
@@ -24,6 +26,8 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
MODULE_DESCRIPTION("TC BPF based classifier");
+#define CLS_BPF_NAME_LEN 256
+
struct cls_bpf_head {
struct list_head plist;
u32 hgen;
@@ -32,18 +36,24 @@ struct cls_bpf_head {
struct cls_bpf_prog {
struct bpf_prog *filter;
- struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct tcf_result res;
struct list_head link;
+ struct tcf_result res;
+ struct tcf_exts exts;
u32 handle;
- u16 bpf_num_ops;
+ union {
+ u32 bpf_fd;
+ u16 bpf_num_ops;
+ };
+ struct sock_filter *bpf_ops;
+ const char *bpf_name;
struct tcf_proto *tp;
struct rcu_head rcu;
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_FD] = { .type = NLA_U32 },
+ [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
[TCA_BPF_OPS] = { .type = NLA_BINARY,
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -76,6 +86,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
+static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
+{
+ return !prog->bpf_ops;
+}
+
static int cls_bpf_init(struct tcf_proto *tp)
{
struct cls_bpf_head *head;
@@ -94,8 +109,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
tcf_exts_destroy(&prog->exts);
- bpf_prog_destroy(prog->filter);
+ if (cls_bpf_is_ebpf(prog))
+ bpf_prog_put(prog->filter);
+ else
+ bpf_prog_destroy(prog->filter);
+ kfree(prog->bpf_name);
kfree(prog->bpf_ops);
kfree(prog);
}
@@ -114,6 +133,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+
return 0;
}
@@ -151,69 +171,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr)
+static int cls_bpf_prog_from_ops(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
{
struct sock_filter *bpf_ops;
- struct tcf_exts exts;
- struct sock_fprog_kern tmp;
+ struct sock_fprog_kern fprog_tmp;
struct bpf_prog *fp;
u16 bpf_size, bpf_num_ops;
- u32 classid;
int ret;
- if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
- return -EINVAL;
-
- tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
- ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
- if (ret < 0)
- return ret;
-
- classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
- if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+ return -EINVAL;
bpf_size = bpf_num_ops * sizeof(*bpf_ops);
- if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
- ret = -EINVAL;
- goto errout;
- }
+ if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
+ return -EINVAL;
bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
- if (bpf_ops == NULL) {
- ret = -ENOMEM;
- goto errout;
- }
+ if (bpf_ops == NULL)
+ return -ENOMEM;
memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
- tmp.len = bpf_num_ops;
- tmp.filter = bpf_ops;
+ fprog_tmp.len = bpf_num_ops;
+ fprog_tmp.filter = bpf_ops;
- ret = bpf_prog_create(&fp, &tmp);
- if (ret)
- goto errout_free;
+ ret = bpf_prog_create(&fp, &fprog_tmp);
+ if (ret < 0) {
+ kfree(bpf_ops);
+ return ret;
+ }
- prog->bpf_num_ops = bpf_num_ops;
prog->bpf_ops = bpf_ops;
+ prog->bpf_num_ops = bpf_num_ops;
+ prog->bpf_name = NULL;
+
prog->filter = fp;
prog->res.classid = classid;
+ return 0;
+}
+
+static int cls_bpf_prog_from_efd(struct nlattr **tb,
+ struct cls_bpf_prog *prog, u32 classid)
+{
+ struct bpf_prog *fp;
+ char *name = NULL;
+ u32 bpf_fd;
+
+ bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+
+ fp = bpf_prog_get(bpf_fd);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
+ bpf_prog_put(fp);
+ return -EINVAL;
+ }
+
+ if (tb[TCA_BPF_NAME]) {
+ name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
+ nla_len(tb[TCA_BPF_NAME]),
+ GFP_KERNEL);
+ if (!name) {
+ bpf_prog_put(fp);
+ return -ENOMEM;
+ }
+ }
+
+ prog->bpf_ops = NULL;
+ prog->bpf_fd = bpf_fd;
+ prog->bpf_name = name;
+
+ prog->filter = fp;
+ prog->res.classid = classid;
+
+ return 0;
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+ struct cls_bpf_prog *prog,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts exts;
+ bool is_bpf, is_ebpf;
+ u32 classid;
+ int ret;
+
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+ !tb[TCA_BPF_CLASSID])
+ return -EINVAL;
+
+ tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+ if (ret < 0)
+ return ret;
+
+ classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
+ cls_bpf_prog_from_efd(tb, prog, classid);
+ if (ret < 0) {
+ tcf_exts_destroy(&exts);
+ return ret;
+ }
+
tcf_bind_filter(tp, &prog->res, base);
tcf_exts_change(tp, &prog->exts, &exts);
return 0;
-errout_free:
- kfree(bpf_ops);
-errout:
- tcf_exts_destroy(&exts);
- return ret;
}
static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -297,11 +369,43 @@ errout:
return ret;
}
+static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ struct nlattr *nla;
+
+ if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
+ return -EMSGSIZE;
+
+ nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
+ sizeof(struct sock_filter));
+ if (nla == NULL)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+ return 0;
+}
+
+static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
+ struct sk_buff *skb)
+{
+ if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
+ return -EMSGSIZE;
+
+ if (prog->bpf_name &&
+ nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *tm)
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
- struct nlattr *nest, *nla;
+ struct nlattr *nest;
+ int ret;
if (prog == NULL)
return skb->len;
@@ -314,16 +418,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
- if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
- goto nla_put_failure;
- nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
- sizeof(struct sock_filter));
- if (nla == NULL)
+ if (cls_bpf_is_ebpf(prog))
+ ret = cls_bpf_dump_ebpf_info(prog, skb);
+ else
+ ret = cls_bpf_dump_bpf_info(prog, skb);
+ if (ret)
goto nla_put_failure;
- memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
-
if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
OpenPOWER on IntegriCloud