From f13f2aeed154da8e48f90b85e720f8ba39b1e881 Mon Sep 17 00:00:00 2001 From: Philip Whineray Date: Sun, 22 Nov 2015 11:35:07 +0000 Subject: netfilter: Set /proc/net entries owner to root in namespace Various files are owned by root with 0440 permission. Reading them is impossible in an unprivileged user namespace, interfering with firewall tools. For instance, iptables-save relies on /proc/net/ip_tables_names contents to dump only loaded tables. This patch assigned ownership of the following files to root in the current namespace: - /proc/net/*_tables_names - /proc/net/*_tables_matches - /proc/net/*_tables_targets - /proc/net/nf_conntrack - /proc/net/nf_conntrack_expect - /proc/net/netfilter/nfnetlink_log A mapping for root must be available, so this order should be followed: unshare(CLONE_NEWUSER); /* Setup the mapping */ unshare(CLONE_NEWNET); Signed-off-by: Philip Whineray Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 7 +++++++ net/netfilter/nf_conntrack_standalone.c | 7 +++++++ net/netfilter/nfnetlink_log.c | 15 +++++++++++++-- net/netfilter/x_tables.c | 12 ++++++++++++ 4 files changed, 39 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index acf5c7b..278927a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -596,11 +596,18 @@ static int exp_proc_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_PROCFS struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; proc = proc_create("nf_conntrack_expect", 0440, net->proc_net, &exp_file_ops); if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif /* CONFIG_NF_CONNTRACK_PROCFS */ return 0; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 1fb3cac..0f1a45b 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -392,11 +392,18 @@ static const struct file_operations ct_cpu_seq_fops = { static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; + kuid_t root_uid; + kgid_t root_gid; pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops); if (!pde) goto out_nf_conntrack; + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(pde, root_uid, root_gid); + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 740cce4..dea4676 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1064,15 +1064,26 @@ static int __net_init nfnl_log_net_init(struct net *net) { unsigned int i; struct nfnl_log_net *log = nfnl_log_pernet(net); +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; +#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&log->instance_table[i]); spin_lock_init(&log->instances_lock); #ifdef CONFIG_PROC_FS - if (!proc_create("nfnetlink_log", 0440, - net->nf.proc_netfilter, &nful_file_ops)) + proc = proc_create("nfnetlink_log", 0440, + net->nf.proc_netfilter, &nful_file_ops); + if (!proc) return -ENOMEM; + + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d4aaad7..c8a0b7d 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1226,6 +1227,8 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; + kuid_t root_uid; + kgid_t root_gid; #endif if (af >= ARRAY_SIZE(xt_prefix)) @@ -1233,12 +1236,17 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS + root_uid = make_kuid(net->user_ns, 0); + root_gid = make_kgid(net->user_ns, 0); + strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, (void *)(unsigned long)af); if (!proc) goto out; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); @@ -1246,6 +1254,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_tables; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); @@ -1253,6 +1263,8 @@ int xt_proto_init(struct net *net, u_int8_t af) (void *)(unsigned long)af); if (!proc) goto out_remove_matches; + if (uid_valid(root_uid) && gid_valid(root_gid)) + proc_set_user(proc, root_uid, root_gid); #endif return 0; -- cgit v1.1 From 7ec3f7b47b8d9ad7ba425726f2c58f9ddce040df Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 24 Nov 2015 10:00:22 +0000 Subject: netfilter: nft_payload: add packet mangling support Add support for mangling packet payload. Checksum for the specified base header is updated automatically if requested, however no updates for any kind of pseudo headers are supported, meaning no stateless NAT is supported. For checksum updates different checksumming methods can be specified. The currently supported methods are NONE for no checksum updates, and INET for internet type checksums. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_payload.c | 135 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 09b4b07..12cd4bf 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -107,10 +107,13 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -160,6 +163,118 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static void nft_payload_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + struct sk_buff *skb = pkt->skb; + const u32 *src = ®s->data[priv->sreg]; + int offset, csum_offset; + __wsum fsum, tsum; + __sum16 sum; + + switch (priv->base) { + case NFT_PAYLOAD_LL_HEADER: + if (!skb_mac_header_was_set(skb)) + goto err; + offset = skb_mac_header(skb) - skb->data; + break; + case NFT_PAYLOAD_NETWORK_HEADER: + offset = skb_network_offset(skb); + break; + case NFT_PAYLOAD_TRANSPORT_HEADER: + offset = pkt->xt.thoff; + break; + default: + BUG(); + } + + csum_offset = offset + priv->csum_offset; + offset += priv->offset; + + if (priv->csum_type == NFT_PAYLOAD_CSUM_INET && + (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER || + skb->ip_summed != CHECKSUM_PARTIAL)) { + if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + + fsum = skb_checksum(skb, offset, priv->len, 0); + tsum = csum_partial(src, priv->len, 0); + sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), + tsum)); + if (sum == 0) + sum = CSUM_MANGLED_0; + + if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || + skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) + goto err; + } + + if (!skb_make_writable(skb, max(offset + priv->len, 0)) || + skb_store_bits(skb, offset, src, priv->len) < 0) + goto err; + + return; +err: + regs->verdict.code = NFT_BREAK; +} + +static int nft_payload_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_payload_set *priv = nft_expr_priv(expr); + + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); + priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); + priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); + + if (tb[NFTA_PAYLOAD_CSUM_TYPE]) + priv->csum_type = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE])); + if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) + priv->csum_offset = + ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + + switch (priv->csum_type) { + case NFT_PAYLOAD_CSUM_NONE: + case NFT_PAYLOAD_CSUM_INET: + break; + default: + return -EOPNOTSUPP; + } + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_payload_set *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) || + nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || + nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, + htonl(priv->csum_offset))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_expr_ops nft_payload_set_ops = { + .type = &nft_payload_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), + .eval = nft_payload_set_eval, + .init = nft_payload_set_init, + .dump = nft_payload_set_dump, +}; + static const struct nft_expr_ops * nft_payload_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -167,8 +282,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, enum nft_payload_bases base; unsigned int offset, len; - if (tb[NFTA_PAYLOAD_DREG] == NULL || - tb[NFTA_PAYLOAD_BASE] == NULL || + if (tb[NFTA_PAYLOAD_BASE] == NULL || tb[NFTA_PAYLOAD_OFFSET] == NULL || tb[NFTA_PAYLOAD_LEN] == NULL) return ERR_PTR(-EINVAL); @@ -183,6 +297,15 @@ nft_payload_select_ops(const struct nft_ctx *ctx, return ERR_PTR(-EOPNOTSUPP); } + if (tb[NFTA_PAYLOAD_SREG] != NULL) { + if (tb[NFTA_PAYLOAD_DREG] != NULL) + return ERR_PTR(-EINVAL); + return &nft_payload_set_ops; + } + + if (tb[NFTA_PAYLOAD_DREG] == NULL) + return ERR_PTR(-EINVAL); + offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); -- cgit v1.1 From 33d5a7b14bfd02e60af9d223db8dfff0cbcabe6b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:04 +0100 Subject: netfilter: nf_tables: extend tracing infrastructure nft monitor mode can then decode and display this trace data. Parts of LL/Network/Transport headers are provided as separate attributes. Otherwise, printing IP address data becomes virtually impossible for userspace since in the case of the netdev family we really don't want userspace to have to know all the possible link layer types and/or sizes just to display/print an ip address. We also don't want userspace to have to follow ipv6 header chains to get the s/dport info, the kernel already did this work for us. To avoid bloating nft_do_chain all data required for tracing is encapsulated in nft_traceinfo. The structure is initialized unconditionally(!) for each nft_do_chain invocation. This unconditionall call will be moved under a static key in a followup patch. With lots of help from Patrick McHardy and Pablo Neira. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 12 +- net/netfilter/nf_tables_core.c | 45 +++++-- net/netfilter/nf_tables_trace.c | 271 ++++++++++++++++++++++++++++++++++++++++ net/netfilter/nfnetlink.c | 1 + 5 files changed, 312 insertions(+), 19 deletions(-) create mode 100644 net/netfilter/nf_tables_trace.c (limited to 'net/netfilter') diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7638c36..2293484 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables -nf_tables-objs += nf_tables_core.o nf_tables_api.o +nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 93cc473..c4969a0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4446,22 +4446,22 @@ static void nft_verdict_uninit(const struct nft_data *data) } } -static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) +int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) { struct nlattr *nest; - nest = nla_nest_start(skb, NFTA_DATA_VERDICT); + nest = nla_nest_start(skb, type); if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code))) goto nla_put_failure; - switch (data->verdict.code) { + switch (v->code) { case NFT_JUMP: case NFT_GOTO: if (nla_put_string(skb, NFTA_VERDICT_CHAIN, - data->verdict.chain->name)) + v->chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4572,7 +4572,7 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, err = nft_value_dump(skb, data, len); break; case NFT_DATA_VERDICT: - err = nft_verdict_dump(skb, data); + err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict); break; default: err = -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f3695a4..2395de7 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -44,22 +44,36 @@ static struct nf_loginfo trace_loginfo = { }, }; -static void __nft_trace_packet(const struct nft_pktinfo *pkt, - const struct nft_chain *chain, - int rulenum, enum nft_trace type) +static noinline void __nft_trace_packet(struct nft_traceinfo *info, + const struct nft_chain *chain, + int rulenum, enum nft_trace type) { + const struct nft_pktinfo *pkt = info->pkt; + + if (!pkt->skb->nf_trace) + return; + + info->chain = chain; + info->type = type; + + nft_trace_notify(info); + nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in, pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", chain->table->name, chain->name, comments[type], rulenum); } -static inline void nft_trace_packet(const struct nft_pktinfo *pkt, +static inline void nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + const struct nft_rule *rule, + int rulenum, + enum nft_trace_types type) { - if (unlikely(pkt->skb->nf_trace)) - __nft_trace_packet(pkt, chain, rulenum, type); + if (unlikely(info->trace)) { + info->rule = rule; + __nft_trace_packet(info, chain, rulenum, type); + } } static void nft_cmp_fast_eval(const struct nft_expr *expr, @@ -121,7 +135,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); + struct nft_traceinfo info; + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); @@ -151,7 +167,8 @@ next_rule: regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); continue; } break; @@ -161,7 +178,8 @@ next_rule: case NF_ACCEPT: case NF_DROP: case NF_QUEUE: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); return regs.verdict.code; } @@ -174,7 +192,8 @@ next_rule: stackptr++; /* fall through */ case NFT_GOTO: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RULE); chain = regs.verdict.chain; goto do_chain; @@ -182,7 +201,8 @@ next_rule: rulenum++; /* fall through */ case NFT_RETURN: - nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); + nft_trace_packet(&info, chain, rule, + rulenum, NFT_TRACETYPE_RETURN); break; default: WARN_ON(1); @@ -196,7 +216,8 @@ next_rule: goto next_rule; } - nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); + nft_trace_packet(&info, basechain, NULL, -1, + NFT_TRACETYPE_POLICY); rcu_read_lock_bh(); stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c new file mode 100644 index 0000000..36fd7ad --- /dev/null +++ b/net/netfilter/nf_tables_trace.c @@ -0,0 +1,271 @@ +/* + * (C) 2015 Red Hat GmbH + * Author: Florian Westphal + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFT_TRACETYPE_LL_HSIZE 20 +#define NFT_TRACETYPE_NETWORK_HSIZE 40 +#define NFT_TRACETYPE_TRANSPORT_HSIZE 20 + +static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) +{ + __be32 id; + + /* using skb address as ID results in a limited number of + * values (and quick reuse). + * + * So we attempt to use as many skb members that will not + * change while skb is with netfilter. + */ + id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), + skb->skb_iif); + + return nla_put_be32(nlskb, NFTA_TRACE_ID, id); +} + +static int trace_fill_header(struct sk_buff *nlskb, u16 type, + const struct sk_buff *skb, + int off, unsigned int len) +{ + struct nlattr *nla; + + if (len == 0) + return 0; + + nla = nla_reserve(nlskb, type, len); + if (!nla || skb_copy_bits(skb, off, nla_data(nla), len)) + return -1; + + return 0; +} + +static int nf_trace_fill_ll_header(struct sk_buff *nlskb, + const struct sk_buff *skb) +{ + struct vlan_ethhdr veth; + int off; + + BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE); + + off = skb_mac_header(skb) - skb->data; + if (off != -ETH_HLEN) + return -1; + + if (skb_copy_bits(skb, off, &veth, ETH_HLEN)) + return -1; + + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth); +} + +static int nf_trace_fill_dev_info(struct sk_buff *nlskb, + const struct net_device *indev, + const struct net_device *outdev) +{ + if (indev) { + if (nla_put_be32(nlskb, NFTA_TRACE_IIF, + htonl(indev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE, + htons(indev->type))) + return -1; + } + + if (outdev) { + if (nla_put_be32(nlskb, NFTA_TRACE_OIF, + htonl(outdev->ifindex))) + return -1; + + if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE, + htons(outdev->type))) + return -1; + } + + return 0; +} + +static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, + const struct nft_pktinfo *pkt) +{ + const struct sk_buff *skb = pkt->skb; + unsigned int len = min_t(unsigned int, + pkt->xt.thoff - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); + int off = skb_network_offset(skb); + + if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) + return -1; + + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_get(skb)) + return nf_trace_fill_ll_header(nlskb, skb); + + off = skb_mac_header(skb) - skb->data; + len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE); + return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER, + skb, off, len); +} + +static int nf_trace_fill_rule_info(struct sk_buff *nlskb, + const struct nft_traceinfo *info) +{ + if (!info->rule) + return 0; + + /* a continue verdict with ->type == RETURN means that this is + * an implicit return (end of chain reached). + * + * Since no rule matched, the ->rule pointer is invalid. + */ + if (info->type == NFT_TRACETYPE_RETURN && + info->verdict->code == NFT_CONTINUE) + return 0; + + return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE, + cpu_to_be64(info->rule->handle)); +} + +void nft_trace_notify(struct nft_traceinfo *info) +{ + const struct nft_pktinfo *pkt = info->pkt; + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + struct sk_buff *skb; + unsigned int size; + int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE; + + if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE)) + return; + + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + + nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(sizeof(__be64)) + /* rule handle */ + nla_total_size(sizeof(__be32)) + /* trace type */ + nla_total_size(0) + /* VERDICT, nested */ + nla_total_size(sizeof(u32)) + /* verdict code */ + nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ + nla_total_size(sizeof(u32)) + /* id */ + nla_total_size(NFT_TRACETYPE_LL_HSIZE) + + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + + nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) + + nla_total_size(sizeof(u32)) + /* iif */ + nla_total_size(sizeof(__be16)) + /* iiftype */ + nla_total_size(sizeof(u32)) + /* oif */ + nla_total_size(sizeof(__be16)) + /* oiftype */ + nla_total_size(sizeof(u32)) + /* mark */ + nla_total_size(sizeof(u32)) + /* nfproto */ + nla_total_size(sizeof(u32)); /* policy */ + + skb = nlmsg_new(size, GFP_ATOMIC); + if (!skb) + return; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0); + if (!nlh) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = info->basechain->type->family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) + goto nla_put_failure; + + if (trace_fill_id(skb, pkt->skb)) + goto nla_put_failure; + + if (info->chain) { + if (nla_put_string(skb, NFTA_TRACE_CHAIN, + info->chain->name)) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_TRACE_TABLE, + info->chain->table->name)) + goto nla_put_failure; + } + + if (nf_trace_fill_rule_info(skb, info)) + goto nla_put_failure; + + switch (info->type) { + case NFT_TRACETYPE_UNSPEC: + case __NFT_TRACETYPE_MAX: + break; + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) + goto nla_put_failure; + break; + case NFT_TRACETYPE_POLICY: + if (nla_put_be32(skb, NFTA_TRACE_POLICY, + info->basechain->policy)) + goto nla_put_failure; + break; + } + + if (pkt->skb->mark && + nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + goto nla_put_failure; + + if (!info->packet_dumped) { + if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out)) + goto nla_put_failure; + + if (nf_trace_fill_pkt_info(skb, pkt)) + goto nla_put_failure; + info->packet_dumped = true; + } + + nlmsg_end(skb, nlh); + nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC); + return; + + nla_put_failure: + WARN_ON_ONCE(1); + kfree_skb(skb); +} + +void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, + const struct nft_verdict *verdict, + const struct nft_chain *chain) +{ + info->basechain = nft_base_chain(chain); + info->trace = true; + info->packet_dumped = false; + info->pkt = pkt; + info->verdict = verdict; +} diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 46453ab..28591fa 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -49,6 +49,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, + [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, }; void nfnl_lock(__u8 subsys_id) -- cgit v1.1 From e639f7ab079b5256660018511d87aa34b54f1a9d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Nov 2015 21:53:05 +0100 Subject: netfilter: nf_tables: wrap tracing with a static key Only needed when meta nftrace rule(s) were added. The assumption is that no such rules are active, so the call to nft_trace_init is "never" needed. When nftrace rules are active, we always call the nft_trace_* functions, but will only send netlink messages when all of the following are true: - traceinfo structure was initialised - skb->nf_trace == 1 - at least one subscriber to trace group. Adding an extra conditional (static_branch ... && skb->nf_trace) nft_trace_init( ..) Is possible but results in a larger nft_do_chain footprint. Signed-off-by: Florian Westphal Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 9 ++++++--- net/netfilter/nf_tables_trace.c | 4 ++++ net/netfilter/nft_meta.c | 16 ++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 2395de7..67fa41d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +51,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, { const struct nft_pktinfo *pkt = info->pkt; - if (!pkt->skb->nf_trace) + if (!info->trace || !pkt->skb->nf_trace) return; info->chain = chain; @@ -70,7 +71,7 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, int rulenum, enum nft_trace_types type) { - if (unlikely(info->trace)) { + if (static_branch_unlikely(&nft_trace_enabled)) { info->rule = rule; __nft_trace_packet(info, chain, rulenum, type); } @@ -137,7 +138,9 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; - nft_trace_init(&info, pkt, ®s.verdict, basechain); + info.trace = false; + if (static_branch_unlikely(&nft_trace_enabled)) + nft_trace_init(&info, pkt, ®s.verdict, basechain); do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 36fd7ad..e9e959f 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,9 @@ #define NFT_TRACETYPE_NETWORK_HSIZE 40 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20 +DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); +EXPORT_SYMBOL_GPL(nft_trace_enabled); + static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) { __be32 id; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 9dfaf4d..85a465b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include /* for TCP_TIME_WAIT */ #include +#include #include void nft_meta_get_eval(const struct nft_expr *expr, @@ -297,6 +299,9 @@ int nft_meta_set_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_META_NFTRACE) + static_branch_inc(&nft_trace_enabled); + return 0; } EXPORT_SYMBOL_GPL(nft_meta_set_init); @@ -334,6 +339,16 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_meta_set_dump); +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_meta *priv = nft_expr_priv(expr); + + if (priv->key == NFT_META_NFTRACE) + static_branch_dec(&nft_trace_enabled); +} +EXPORT_SYMBOL_GPL(nft_meta_set_destroy); + static struct nft_expr_type nft_meta_type; static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, @@ -348,6 +363,7 @@ static const struct nft_expr_ops nft_meta_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_set_eval, .init = nft_meta_set_init, + .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, }; -- cgit v1.1 From 23509fcd4ec5eadcca7a958b354f79dedc2765cc Mon Sep 17 00:00:00 2001 From: "Rosen, Rami" Date: Tue, 8 Dec 2015 07:09:24 -0500 Subject: netfilter: nfnetlink_log: Change setter functions to be void Change return type of nfulnl_set_timeout() and nfulnl_set_qthresh() to be void. This patch changes the return type of the static methods nfulnl_set_timeout() and nfulnl_set_qthresh() to be void, as there is no justification and no need for these methods to return int. Signed-off-by: Rami Rosen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index dea4676..70b6bd3 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -293,24 +293,20 @@ nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) return status; } -static int +static void nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) { spin_lock_bh(&inst->lock); inst->flushtimeout = timeout; spin_unlock_bh(&inst->lock); - - return 0; } -static int +static void nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) { spin_lock_bh(&inst->lock); inst->qthreshold = qthresh; spin_unlock_bh(&inst->lock); - - return 0; } static int -- cgit v1.1 From 9fb0b519c7e094e741a3fc3fd4d854a8bc74b6dc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Dec 2015 16:31:21 +0100 Subject: netfilter: nf_tables: fix nf_log_trace based tracing nf_log_trace() outputs bogus 'TRACE:' strings because I forgot to update the comments array. Fixes: 33d5a7b14bfd0 ("netfilter: nf_tables: extend tracing infrastructure") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 67fa41d..e9f8dff 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -23,16 +23,10 @@ #include #include -enum nft_trace { - NFT_TRACE_RULE, - NFT_TRACE_RETURN, - NFT_TRACE_POLICY, -}; - -static const char *const comments[] = { - [NFT_TRACE_RULE] = "rule", - [NFT_TRACE_RETURN] = "return", - [NFT_TRACE_POLICY] = "policy", +static const char *const comments[__NFT_TRACETYPE_MAX] = { + [NFT_TRACETYPE_POLICY] = "policy", + [NFT_TRACETYPE_RETURN] = "return", + [NFT_TRACETYPE_RULE] = "rule", }; static struct nf_loginfo trace_loginfo = { @@ -47,7 +41,7 @@ static struct nf_loginfo trace_loginfo = { static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, - int rulenum, enum nft_trace type) + int rulenum, enum nft_trace_types type) { const struct nft_pktinfo *pkt = info->pkt; -- cgit v1.1 From 19576c9478682a398276c994ea0d2696474df32b Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Wed, 9 Dec 2015 14:07:40 +0100 Subject: netfilter: cttimeout: add netns support Add a per-netns list of timeout objects and adjust code to use it. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_timeout.c | 2 +- net/netfilter/nfnetlink_cttimeout.c | 82 ++++++++++++++++++++++-------------- net/netfilter/xt_CT.c | 2 +- 3 files changed, 53 insertions(+), 33 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 93da609..26e7420 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -25,7 +25,7 @@ #include struct ctnl_timeout * -(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly; +(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook); void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index c7a2d0e..3921d54 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -38,8 +38,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); -static LIST_HEAD(cttimeout_list); - static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, @@ -90,7 +88,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); - list_for_each_entry(timeout, &cttimeout_list, head) { + list_for_each_entry(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -145,7 +143,7 @@ cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, timeout->l3num = l3num; timeout->l4proto = l4proto; atomic_set(&timeout->refcnt, 1); - list_add_tail_rcu(&timeout->head, &cttimeout_list); + list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list); return 0; err: @@ -209,6 +207,7 @@ nla_put_failure: static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) @@ -219,7 +218,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &cttimeout_list, head) { + list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) { if (last) { if (cur != last) continue; @@ -245,6 +244,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; @@ -260,7 +260,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) @@ -301,17 +301,17 @@ static void untimeout(struct nf_conntrack_tuple_hash *i, RCU_INIT_POINTER(timeout_ext->timeout, NULL); } -static void ctnl_untimeout(struct ctnl_timeout *timeout) +static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; int i; local_bh_disable(); - for (i = 0; i < init_net.ct.htable_size; i++) { + for (i = 0; i < net->ct.htable_size; i++) { spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < init_net.ct.htable_size) { - hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode) + if (i < net->ct.htable_size) { + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) untimeout(h, timeout); } spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -320,7 +320,7 @@ static void ctnl_untimeout(struct ctnl_timeout *timeout) } /* try to delete object, fail if it is still in use. */ -static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; @@ -329,7 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_l4proto_put(timeout->l4proto); - ctnl_untimeout(timeout); + ctnl_untimeout(net, timeout); kfree_rcu(timeout, rcu_head); } else { /* still in use, restore reference counter. */ @@ -344,23 +344,24 @@ cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(skb->sk); char *name; struct ctnl_timeout *cur; int ret = -ENOENT; if (!cda[CTA_TIMEOUT_NAME]) { - list_for_each_entry(cur, &cttimeout_list, head) - ctnl_timeout_try_del(cur); + list_for_each_entry(cur, &net->nfct_timeout_list, head) + ctnl_timeout_try_del(net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); - list_for_each_entry(cur, &cttimeout_list, head) { + list_for_each_entry(cur, &net->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; - ret = ctnl_timeout_try_del(cur); + ret = ctnl_timeout_try_del(net, cur); if (ret < 0) return ret; @@ -511,12 +512,13 @@ err: } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT -static struct ctnl_timeout *ctnl_timeout_find_get(const char *name) +static struct ctnl_timeout * +ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; rcu_read_lock(); - list_for_each_entry_rcu(timeout, &cttimeout_list, head) { + list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -569,10 +571,39 @@ static const struct nfnetlink_subsystem cttimeout_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); +static int __net_init cttimeout_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfct_timeout_list); + + return 0; +} + +static void __net_exit cttimeout_net_exit(struct net *net) +{ + struct ctnl_timeout *cur, *tmp; + + ctnl_untimeout(net, NULL); + + list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { + list_del_rcu(&cur->head); + nf_ct_l4proto_put(cur->l4proto); + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations cttimeout_ops = { + .init = cttimeout_net_init, + .exit = cttimeout_net_exit, +}; + static int __init cttimeout_init(void) { int ret; + ret = register_pernet_subsys(&cttimeout_ops); + if (ret < 0) + return ret; + ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " @@ -586,28 +617,17 @@ static int __init cttimeout_init(void) return 0; err_out: + unregister_pernet_subsys(&cttimeout_ops); return ret; } static void __exit cttimeout_exit(void) { - struct ctnl_timeout *cur, *tmp; - pr_info("cttimeout: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&cttimeout_subsys); - /* Make sure no conntrack objects refer to custom timeouts anymore. */ - ctnl_untimeout(NULL); - - list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. - */ - nf_ct_l4proto_put(cur->l4proto); - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&cttimeout_ops); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL); RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index e7ac07e..6669e68 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -143,7 +143,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, goto out; } - timeout = timeout_find_get(timeout_name); + timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; pr_info("No such timeout policy \"%s\"\n", timeout_name); -- cgit v1.1 From 4ec8ff0edccffe7a77f18e2a1e2ce86f03e08b5c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:54 -0500 Subject: netfilter: prepare xt_cgroup for multi revisions xt_cgroup will grow cgroup2 path based match. Postfix existing symbols with _v0 and prepare for multi revision registration. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 54eaeb4..1730025 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: process control group matching"); MODULE_ALIAS("ipt_cgroup"); MODULE_ALIAS("ip6t_cgroup"); -static int cgroup_mt_check(const struct xt_mtchk_param *par) +static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_cgroup_info *info = par->matchinfo; + struct xt_cgroup_info_v0 *info = par->matchinfo; if (info->invert & ~1) return -EINVAL; @@ -35,9 +35,9 @@ static int cgroup_mt_check(const struct xt_mtchk_param *par) } static bool -cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) +cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { - const struct xt_cgroup_info *info = par->matchinfo; + const struct xt_cgroup_info_v0 *info = par->matchinfo; if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; @@ -46,27 +46,29 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } -static struct xt_match cgroup_mt_reg __read_mostly = { - .name = "cgroup", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = cgroup_mt_check, - .match = cgroup_mt, - .matchsize = sizeof(struct xt_cgroup_info), - .me = THIS_MODULE, - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_IN), +static struct xt_match cgroup_mt_reg[] __read_mostly = { + { + .name = "cgroup", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v0, + .match = cgroup_mt_v0, + .matchsize = sizeof(struct xt_cgroup_info_v0), + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) { - return xt_register_match(&cgroup_mt_reg); + return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } static void __exit cgroup_mt_exit(void) { - xt_unregister_match(&cgroup_mt_reg); + xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg)); } module_init(cgroup_mt_init); -- cgit v1.1 From c38c4597e4bf3e99860eac98211748e1ecb0e139 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Dec 2015 17:38:55 -0500 Subject: netfilter: implement xt_cgroup cgroup2 path match This patch implements xt_cgroup path match which matches cgroup2 membership of the associated socket. The match is recursive and invertible. For rationales on introducing another cgroup based match, please refer to a preceding commit "sock, cgroup: add sock->sk_cgroup". v3: Folded into xt_cgroup as a new revision interface as suggested by Pablo. v2: Included linux/limits.h from xt_cgroup2.h for PATH_MAX. Added explicit alignment to the priv field. Both suggested by Jan. Signed-off-by: Tejun Heo Cc: Daniel Borkmann Cc: Daniel Wagner CC: Neil Horman Cc: Jan Engelhardt Cc: Pablo Neira Ayuso Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 1730025..a086a91 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -34,6 +34,37 @@ static int cgroup_mt_check_v0(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info("xt_cgroup: both path and classid specified\n"); + return -EINVAL; + } + + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info("xt_cgroup: invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { @@ -46,6 +77,31 @@ cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) info->invert; } +static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v1 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + + if (!skb->sk || !sk_fullsock(skb->sk)) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + +static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v1 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + static struct xt_match cgroup_mt_reg[] __read_mostly = { { .name = "cgroup", @@ -59,6 +115,19 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), }, + { + .name = "cgroup", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v1, + .match = cgroup_mt_v1, + .matchsize = sizeof(struct xt_cgroup_info_v1), + .destroy = cgroup_mt_destroy_v1, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) -- cgit v1.1 From 9c55d3b5459bffe8ac098175e672a90260c0cfa4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 3 Dec 2015 10:49:42 +0100 Subject: nfnetlink: add nfnl_dereference_protected helper to avoid overly long line in followup patch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 28591fa..aebf5cd 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -33,6 +33,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); +#define nfnl_dereference_protected(id) \ + rcu_dereference_protected(table[(id)].subsys, \ + lockdep_nfnl_is_held((id))) + static char __initdata nfversion[] = "0.30"; static struct { @@ -208,8 +212,7 @@ replay: } else { rcu_read_unlock(); nfnl_lock(subsys_id); - if (rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)) != ss || + if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) err = -EAGAIN; else if (nc->call) @@ -299,15 +302,13 @@ replay: skb->sk = oskb->sk; nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) { #ifdef CONFIG_MODULES nfnl_unlock(subsys_id); request_module("nfnetlink-subsys-%d", subsys_id); nfnl_lock(subsys_id); - ss = rcu_dereference_protected(table[subsys_id].subsys, - lockdep_is_held(&table[subsys_id].mutex)); + ss = nfnl_dereference_protected(subsys_id); if (!ss) #endif { -- cgit v1.1 From b4aae759c22e71a3c32144f0b3bc4f2fa4aaae98 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 10 Dec 2015 18:04:07 +0100 Subject: netfilter: meta: add support for setting skb->pkttype This allows to redirect bridged packets to local machine: ether type ip ether daddr set aa:53:08:12:34:56 meta pkttype set unicast Without 'set unicast', ip stack discards PACKET_OTHERHOST skbs. It is also useful to add support for a '-m cluster like' nft rule (where switch floods packets to several nodes, and each cluster node node processes a subset of packets for load distribution). Mangling is restricted to HOST/OTHER/BROAD/MULTICAST, i.e. you cannot set skb->pkt_type to PACKET_KERNEL or change PACKET_LOOPBACK to PACKET_HOST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_meta.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5bcd1b0..fe885bf 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -26,6 +26,8 @@ #include #include +#include /* NF_BR_PRE_ROUTING */ + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -190,6 +192,13 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); +/* don't change or set _LOOPBACK, _USER, etc. */ +static bool pkt_type_ok(u32 p) +{ + return p == PACKET_HOST || p == PACKET_BROADCAST || + p == PACKET_MULTICAST || p == PACKET_OTHERHOST; +} + void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -205,6 +214,11 @@ void nft_meta_set_eval(const struct nft_expr *expr, case NFT_META_PRIORITY: skb->priority = value; break; + case NFT_META_PKTTYPE: + if (skb->pkt_type != value && + pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb->pkt_type = value; + break; case NFT_META_NFTRACE: skb->nf_trace = 1; break; @@ -273,6 +287,24 @@ int nft_meta_get_init(const struct nft_ctx *ctx, } EXPORT_SYMBOL_GPL(nft_meta_get_init); +static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx) +{ + unsigned int hooks; + + switch (ctx->afi->family) { + case NFPROTO_BRIDGE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + case NFPROTO_NETDEV: + hooks = 1 << NF_NETDEV_INGRESS; + break; + default: + return -EOPNOTSUPP; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + int nft_meta_set_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -290,6 +322,12 @@ int nft_meta_set_init(const struct nft_ctx *ctx, case NFT_META_NFTRACE: len = sizeof(u8); break; + case NFT_META_PKTTYPE: + err = nft_meta_set_init_pkttype(ctx); + if (err) + return err; + len = sizeof(u8); + break; default: return -EOPNOTSUPP; } -- cgit v1.1