summaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-12-07 19:16:46 -0500
committerDavid S. Miller <davem@davemloft.net>2016-12-07 19:16:46 -0500
commit5fccd64aa44829f87997e3342698ef98862adffd (patch)
tree21cbd3c0975d1d90b4b29f513871853db09a4094 /include/net
parent63c36c40b9b031b760f89f5991843b6eeb6314e7 (diff)
parent73c25fb139337ac4fe1695ae3c056961855594db (diff)
downloadop-kernel-dev-5fccd64aa44829f87997e3342698ef98862adffd.zip
op-kernel-dev-5fccd64aa44829f87997e3342698ef98862adffd.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains a large Netfilter update for net-next, to summarise: 1) Add support for stateful objects. This series provides a nf_tables native alternative to the extended accounting infrastructure for nf_tables. Two initial stateful objects are supported: counters and quotas. Objects are identified by a user-defined name, you can fetch and reset them anytime. You can also use a maps to allow fast lookups using any arbitrary key combination. More info at: http://marc.info/?l=netfilter-devel&m=148029128323837&w=2 2) On-demand registration of nf_conntrack and defrag hooks per netns. Register nf_conntrack hooks if we have a stateful ruleset, ie. state-based filtering or NAT. The new nf_conntrack_default_on sysctl enables this from newly created netnamespaces. Default behaviour is not modified. Patches from Florian Westphal. 3) Allocate 4k chunks and then use these for x_tables counter allocation requests, this improves ruleset load time and also datapath ruleset evaluation, patches from Florian Westphal. 4) Add support for ebpf to the existing x_tables bpf extension. From Willem de Bruijn. 5) Update layer 4 checksum if any of the pseudoheader fields is updated. This provides a limited form of 1:1 stateless NAT that make sense in specific scenario, eg. load balancing. 6) Add support to flush sets in nf_tables. This series comes with a new set->ops->deactivate_one() indirection given that we have to walk over the list of set elements, then deactivate them one by one. The existing set->ops->deactivate() performs an element lookup that we don't need. 7) Two patches to avoid cloning packets, thus speed up packet forwarding via nft_fwd from ingress. From Florian Westphal. 8) Two IPVS patches via Simon Horman: Decrement ttl in all modes to prevent infinite loops, patch from Dwip Banerjee. And one minor refactoring from Gao feng. 9) Revisit recent log support for nf_tables netdev families: One patch to ensure that we correctly handle non-ethernet packets. Another patch to add missing logger definition for netdev. Patches from Liping Zhang. 10) Three patches for nft_fib, one to address insufficient register initialization and another to solve incorrect (although harmless) byteswap operation. Moreover update xt_rpfilter and nft_fib to match lbcast packets with zeronet as source, eg. DHCP Discover packets (0.0.0.0 -> 255.255.255.255). Also from Liping Zhang. 11) Built-in DCCP, SCTP and UDPlite conntrack and NAT support, from Davide Caratti. While DCCP is rather hopeless lately, and UDPlite has been broken in many-cast mode for some little time, let's give them a chance by placing them at the same level as other existing protocols. Thus, users don't explicitly have to modprobe support for this and NAT rules work for them. Some people point to the lack of support in SOHO Linux-based routers that make deployment of new protocols harder. I guess other middleboxes outthere on the Internet are also to blame. Anyway, let's see if this has any impact in the midrun. 12) Skip software SCTP software checksum calculation if the NIC comes with SCTP checksum offload support. From Davide Caratti. 13) Initial core factoring to prepare conversion to hook array. Three patches from Aaron Conole. 14) Gao Feng made a wrong conversion to switch in the xt_multiport extension in a patch coming in the previous batch. Fix it in this batch. 15) Get vmalloc call in sync with kmalloc flags to avoid a warning and likely OOM killer intervention from x_tables. From Marcelo Ricardo Leitner. 16) Update Arturo Borrero's email address in all source code headers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h9
-rw-r--r--include/net/netfilter/ipv4/nf_defrag_ipv4.h3
-rw-r--r--include/net/netfilter/ipv6/nf_conntrack_ipv6.h9
-rw-r--r--include/net/netfilter/ipv6/nf_defrag_ipv6.h3
-rw-r--r--include/net/netfilter/nf_conntrack.h4
-rw-r--r--include/net/netfilter/nf_conntrack_l3proto.h16
-rw-r--r--include/net/netfilter/nf_dup_netdev.h1
-rw-r--r--include/net/netfilter/nf_log.h4
-rw-r--r--include/net/netfilter/nf_nat_l4proto.h9
-rw-r--r--include/net/netfilter/nf_tables.h101
-rw-r--r--include/net/netfilter/nf_tables_core.h1
-rw-r--r--include/net/netns/conntrack.h43
-rw-r--r--include/net/netns/netfilter.h6
13 files changed, 202 insertions, 7 deletions
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 981c327..919e4e8 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -15,6 +15,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4;
+#endif
int nf_conntrack_ipv4_compat_init(void);
void nf_conntrack_ipv4_compat_fini(void);
diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
index f01ef20..db405f7 100644
--- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h
@@ -1,6 +1,7 @@
#ifndef _NF_DEFRAG_IPV4_H
#define _NF_DEFRAG_IPV4_H
-void nf_defrag_ipv4_enable(void);
+struct net;
+int nf_defrag_ipv4_enable(struct net *);
#endif /* _NF_DEFRAG_IPV4_H */
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index a4c9936..eaea968 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -6,6 +6,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6;
+#endif
#include <linux/sysctl.h>
extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index ddf162f..7664efe 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -1,7 +1,8 @@
#ifndef _NF_DEFRAG_IPV6_H
#define _NF_DEFRAG_IPV6_H
-void nf_defrag_ipv6_enable(void);
+struct net;
+int nf_defrag_ipv6_enable(struct net *);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d9d52c0..5916aa9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct)
int nf_ct_l3proto_try_module_get(unsigned short l3proto);
void nf_ct_l3proto_module_put(unsigned short l3proto);
+/* load module; enable/disable conntrack in this namespace */
+int nf_ct_netns_get(struct net *net, u8 nfproto);
+void nf_ct_netns_put(struct net *net, u8 nfproto);
+
/*
* Allocate a hashtable of hlist_head (if nulls == 0),
* or hlist_nulls_head (if nulls == 1)
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 8992e42..e01559b 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -52,6 +52,10 @@ struct nf_conntrack_l3proto {
int (*tuple_to_nlattr)(struct sk_buff *skb,
const struct nf_conntrack_tuple *t);
+ /* Called when netns wants to use connection tracking */
+ int (*net_ns_get)(struct net *);
+ void (*net_ns_put)(struct net *);
+
/*
* Calculate size of tuple nlattr
*/
@@ -63,18 +67,24 @@ struct nf_conntrack_l3proto {
size_t nla_size;
- /* Init l3proto pernet data */
- int (*init_net)(struct net *net);
-
/* Module (if any) which this is connected to. */
struct module *me;
};
extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
+#ifdef CONFIG_SYSCTL
/* Protocol pernet registration. */
int nf_ct_l3proto_pernet_register(struct net *net,
struct nf_conntrack_l3proto *proto);
+#else
+static inline int nf_ct_l3proto_pernet_register(struct net *n,
+ struct nf_conntrack_l3proto *p)
+{
+ return 0;
+}
+#endif
+
void nf_ct_l3proto_pernet_unregister(struct net *net,
struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h
index 397dcae..3e91935 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -2,5 +2,6 @@
#define _NF_DUP_NETDEV_H_
void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
+void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
#endif
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index a559aa4..450f87f 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -109,7 +109,9 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *out,
const struct nf_loginfo *loginfo,
const char *prefix);
-void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum,
+void nf_log_l2packet(struct net *net, u_int8_t pf,
+ __be16 protocol,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 12f4cc8..3923150 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -54,6 +54,15 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
+#ifdef CONFIG_NF_NAT_PROTO_DCCP
+extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
+#endif
+#ifdef CONFIG_NF_NAT_PROTO_SCTP
+extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
+#endif
+#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+extern const struct nf_nat_l4proto nf_nat_l4proto_udplite;
+#endif
bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 32970cb..924325c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -259,7 +259,8 @@ struct nft_expr;
* @lookup: look up an element within the set
* @insert: insert new element into set
* @activate: activate new element in the next generation
- * @deactivate: deactivate element in the next generation
+ * @deactivate: lookup for element and deactivate it in the next generation
+ * @deactivate_one: deactivate element in the next generation
* @remove: remove element from set
* @walk: iterate over all set elemeennts
* @privsize: function to return size of set private data
@@ -294,6 +295,9 @@ struct nft_set_ops {
void * (*deactivate)(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem);
+ bool (*deactivate_one)(const struct net *net,
+ const struct nft_set *set,
+ void *priv);
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
void (*walk)(const struct nft_ctx *ctx,
@@ -326,6 +330,7 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @name: name of the set
* @ktype: key type (numeric type defined by userspace, not used in the kernel)
* @dtype: data type (verdict or numeric type defined by userspace)
+ * @objtype: object type (see NFT_OBJECT_* definitions)
* @size: maximum set size
* @nelems: number of elements
* @ndeact: number of deactivated elements queued for removal
@@ -347,6 +352,7 @@ struct nft_set {
char name[NFT_SET_MAXNAMELEN];
u32 ktype;
u32 dtype;
+ u32 objtype;
u32 size;
atomic_t nelems;
u32 ndeact;
@@ -416,6 +422,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
* @NFT_SET_EXT_EXPIRATION: element expiration time
* @NFT_SET_EXT_USERDATA: user data associated with the element
* @NFT_SET_EXT_EXPR: expression assiociated with the element
+ * @NFT_SET_EXT_OBJREF: stateful object reference associated with element
* @NFT_SET_EXT_NUM: number of extension types
*/
enum nft_set_extensions {
@@ -426,6 +433,7 @@ enum nft_set_extensions {
NFT_SET_EXT_EXPIRATION,
NFT_SET_EXT_USERDATA,
NFT_SET_EXT_EXPR,
+ NFT_SET_EXT_OBJREF,
NFT_SET_EXT_NUM
};
@@ -554,6 +562,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
return elem + set->ops->elemsize;
}
+static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_OBJREF);
+}
+
void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
@@ -875,6 +888,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
* @list: used internally
* @chains: chains in the table
* @sets: sets in the table
+ * @objects: stateful objects in the table
* @hgenerator: handle generator state
* @use: number of chain references to this table
* @flags: table flag (see enum nft_table_flags)
@@ -885,6 +899,7 @@ struct nft_table {
struct list_head list;
struct list_head chains;
struct list_head sets;
+ struct list_head objects;
u64 hgenerator;
u32 use;
u16 flags:14,
@@ -935,6 +950,80 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
const struct nft_verdict *v);
/**
+ * struct nft_object - nf_tables stateful object
+ *
+ * @list: table stateful object list node
+ * @table: table this object belongs to
+ * @type: pointer to object type
+ * @data: pointer to object data
+ * @name: name of this stateful object
+ * @genmask: generation mask
+ * @use: number of references to this stateful object
+ * @data: object data, layout depends on type
+ */
+struct nft_object {
+ struct list_head list;
+ char name[NFT_OBJ_MAXNAMELEN];
+ struct nft_table *table;
+ u32 genmask:2,
+ use:30;
+ /* runtime data below here */
+ const struct nft_object_type *type ____cacheline_aligned;
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(u64))));
+};
+
+static inline void *nft_obj_data(const struct nft_object *obj)
+{
+ return (void *)obj->data;
+}
+
+#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
+
+struct nft_object *nf_tables_obj_lookup(const struct nft_table *table,
+ const struct nlattr *nla, u32 objtype,
+ u8 genmask);
+
+int nft_obj_notify(struct net *net, struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq,
+ int event, int family, int report, gfp_t gfp);
+
+/**
+ * struct nft_object_type - stateful object type
+ *
+ * @eval: stateful object evaluation function
+ * @list: list node in list of object types
+ * @type: stateful object numeric type
+ * @size: stateful object size
+ * @owner: module owner
+ * @maxattr: maximum netlink attribute
+ * @policy: netlink attribute policy
+ * @init: initialize object from netlink attributes
+ * @destroy: release existing stateful object
+ * @dump: netlink dump stateful object
+ */
+struct nft_object_type {
+ void (*eval)(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt);
+ struct list_head list;
+ u32 type;
+ unsigned int size;
+ unsigned int maxattr;
+ struct module *owner;
+ const struct nla_policy *policy;
+ int (*init)(const struct nlattr * const tb[],
+ struct nft_object *obj);
+ void (*destroy)(struct nft_object *obj);
+ int (*dump)(struct sk_buff *skb,
+ struct nft_object *obj,
+ bool reset);
+};
+
+int nft_register_obj(struct nft_object_type *obj_type);
+void nft_unregister_obj(struct nft_object_type *obj_type);
+
+/**
* struct nft_traceinfo - nft tracing information and state
*
* @pkt: pktinfo currently processed
@@ -981,6 +1070,9 @@ void nft_trace_notify(struct nft_traceinfo *info);
#define MODULE_ALIAS_NFT_SET() \
MODULE_ALIAS("nft-set")
+#define MODULE_ALIAS_NFT_OBJ(type) \
+ MODULE_ALIAS("nft-obj-" __stringify(type))
+
/*
* The gencursor defines two generations, the currently active and the
* next one. Objects contain a bitmask of 2 bits specifying the generations
@@ -1157,4 +1249,11 @@ struct nft_trans_elem {
#define nft_trans_elem(trans) \
(((struct nft_trans_elem *)trans->data)->elem)
+struct nft_trans_obj {
+ struct nft_object *obj;
+};
+
+#define nft_trans_obj(trans) \
+ (((struct nft_trans_obj *)trans->data)->obj)
+
#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 862373d..8f690ef 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -45,6 +45,7 @@ struct nft_payload_set {
enum nft_registers sreg:8;
u8 csum_type;
u8 csum_offset;
+ u8 csum_flags;
};
extern const struct nft_expr_ops nft_payload_fast_ops;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3d06d94..cf799fc 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -6,6 +6,12 @@
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+#include <linux/netfilter/nf_conntrack_dccp.h>
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+#include <linux/netfilter/nf_conntrack_sctp.h>
+#endif
#include <linux/seqlock.h>
struct ctl_table_header;
@@ -48,12 +54,49 @@ struct nf_icmp_net {
unsigned int timeout;
};
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+struct nf_dccp_net {
+ struct nf_proto_net pn;
+ int dccp_loose;
+ unsigned int dccp_timeout[CT_DCCP_MAX + 1];
+};
+#endif
+
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+struct nf_sctp_net {
+ struct nf_proto_net pn;
+ unsigned int timeouts[SCTP_CONNTRACK_MAX];
+};
+#endif
+
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+enum udplite_conntrack {
+ UDPLITE_CT_UNREPLIED,
+ UDPLITE_CT_REPLIED,
+ UDPLITE_CT_MAX
+};
+
+struct nf_udplite_net {
+ struct nf_proto_net pn;
+ unsigned int timeouts[UDPLITE_CT_MAX];
+};
+#endif
+
struct nf_ip_net {
struct nf_generic_net generic;
struct nf_tcp_net tcp;
struct nf_udp_net udp;
struct nf_icmp_net icmp;
struct nf_icmp_net icmpv6;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ struct nf_dccp_net dccp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ struct nf_sctp_net sctp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ struct nf_udplite_net udplite;
+#endif
};
struct ct_pcpu {
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 58487b1..cea396b 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -17,5 +17,11 @@ struct netns_nf {
struct ctl_table_header *nf_log_dir_header;
#endif
struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ bool defrag_ipv4;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ bool defrag_ipv6;
+#endif
};
#endif
OpenPOWER on IntegriCloud