diff options
90 files changed, 2513 insertions, 661 deletions
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 433b672..497d668 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -96,6 +96,17 @@ nf_conntrack_max - INTEGER Size of connection tracking table. Default value is nf_conntrack_buckets value * 4. +nf_conntrack_default_on - BOOLEAN + 0 - don't register conntrack in new net namespaces + 1 - register conntrack in new net namespaces (default) + + This controls wheter newly created network namespaces have connection + tracking enabled by default. It will be enabled automatically + regardless of this setting if the new net namespace requires + connection tracking, e.g. when NAT rules are created. + This setting is only visible in initial user namespace, it has no + effect on existing namespaces. + nf_conntrack_tcp_be_liberal - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 6923014..a4b97be 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -75,10 +75,39 @@ struct nf_hook_ops { struct nf_hook_entry { struct nf_hook_entry __rcu *next; - struct nf_hook_ops ops; + nf_hookfn *hook; + void *priv; const struct nf_hook_ops *orig_ops; }; +static inline void +nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) +{ + entry->next = NULL; + entry->hook = ops->hook; + entry->priv = ops->priv; + entry->orig_ops = ops; +} + +static inline int +nf_hook_entry_priority(const struct nf_hook_entry *entry) +{ + return entry->orig_ops->priority; +} + +static inline int +nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, + struct nf_hook_state *state) +{ + return entry->hook(entry->priv, skb, state); +} + +static inline const struct nf_hook_ops * +nf_hook_entry_ops(const struct nf_hook_entry *entry) +{ + return entry->orig_ops; +} + static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82..ff721d7 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include <net/netfilter/nf_conntrack_tuple.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cd4eaf8..5117e4d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -403,38 +403,14 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +struct xt_percpu_counter_alloc_state { + unsigned int off; + const char __percpu *mem; +}; -/* On SMP, ip(6)t_entry->counters.pcnt holds address of the - * real (percpu) counter. On !SMP, its just the packet count, - * so nothing needs to be done there. - * - * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. We force an alignment of 16 bytes - * so that bytes/packets share a common cache line. - * - * Hence caller must use IS_ERR_VALUE to check for error, this - * allows us to return 0 for single core systems without forcing - * callers to deal with SMP vs. NONSMP issues. - */ -static inline unsigned long xt_percpu_counter_alloc(void) -{ - if (nr_cpu_ids > 1) { - void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - - if (res == NULL) - return -ENOMEM; - - return (__force unsigned long) res; - } - - return 0; -} -static inline void xt_percpu_counter_free(u64 pcnt) -{ - if (nr_cpu_ids > 1) - free_percpu((void __percpu *) (unsigned long) pcnt); -} +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter); +void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 2dc3b49..5947606 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; + int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. @@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e); + if (ret == 0) + return -1; + + return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 981c327..919e4e8 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -15,6 +15,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4; +#endif int nf_conntrack_ipv4_compat_init(void); void nf_conntrack_ipv4_compat_fini(void); diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h index f01ef20..db405f7 100644 --- a/include/net/netfilter/ipv4/nf_defrag_ipv4.h +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -1,6 +1,7 @@ #ifndef _NF_DEFRAG_IPV4_H #define _NF_DEFRAG_IPV4_H -void nf_defrag_ipv4_enable(void); +struct net; +int nf_defrag_ipv4_enable(struct net *); #endif /* _NF_DEFRAG_IPV4_H */ diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index a4c9936..eaea968 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -6,6 +6,15 @@ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6; extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6; +#endif #include <linux/sysctl.h> extern struct ctl_table nf_ct_ipv6_sysctl_table[]; diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h index ddf162f..7664efe 100644 --- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -1,7 +1,8 @@ #ifndef _NF_DEFRAG_IPV6_H #define _NF_DEFRAG_IPV6_H -void nf_defrag_ipv6_enable(void); +struct net; +int nf_defrag_ipv6_enable(struct net *); int nf_ct_frag6_init(void); void nf_ct_frag6_cleanup(void); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d9d52c0..5916aa9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -181,6 +181,10 @@ static inline void nf_ct_put(struct nf_conn *ct) int nf_ct_l3proto_try_module_get(unsigned short l3proto); void nf_ct_l3proto_module_put(unsigned short l3proto); +/* load module; enable/disable conntrack in this namespace */ +int nf_ct_netns_get(struct net *net, u8 nfproto); +void nf_ct_netns_put(struct net *net, u8 nfproto); + /* * Allocate a hashtable of hlist_head (if nulls == 0), * or hlist_nulls_head (if nulls == 1) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 8992e42..e01559b 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -52,6 +52,10 @@ struct nf_conntrack_l3proto { int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); + /* * Calculate size of tuple nlattr */ @@ -63,18 +67,24 @@ struct nf_conntrack_l3proto { size_t nla_size; - /* Init l3proto pernet data */ - int (*init_net)(struct net *net); - /* Module (if any) which this is connected to. */ struct module *me; }; extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; +#ifdef CONFIG_SYSCTL /* Protocol pernet registration. */ int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto); +#else +static inline int nf_ct_l3proto_pernet_register(struct net *n, + struct nf_conntrack_l3proto *p) +{ + return 0; +} +#endif + void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto); diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h index 397dcae..3e91935 100644 --- a/include/net/netfilter/nf_dup_netdev.h +++ b/include/net/netfilter/nf_dup_netdev.h @@ -2,5 +2,6 @@ #define _NF_DUP_NETDEV_H_ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif); +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif); #endif diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index a559aa4..450f87f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -109,7 +109,9 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix); -void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum, +void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 12f4cc8..3923150 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -54,6 +54,15 @@ extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; +#ifdef CONFIG_NF_NAT_PROTO_DCCP +extern const struct nf_nat_l4proto nf_nat_l4proto_dccp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP +extern const struct nf_nat_l4proto nf_nat_l4proto_sctp; +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE +extern const struct nf_nat_l4proto nf_nat_l4proto_udplite; +#endif bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 32970cb..924325c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -259,7 +259,8 @@ struct nft_expr; * @lookup: look up an element within the set * @insert: insert new element into set * @activate: activate new element in the next generation - * @deactivate: deactivate element in the next generation + * @deactivate: lookup for element and deactivate it in the next generation + * @deactivate_one: deactivate element in the next generation * @remove: remove element from set * @walk: iterate over all set elemeennts * @privsize: function to return size of set private data @@ -294,6 +295,9 @@ struct nft_set_ops { void * (*deactivate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); + bool (*deactivate_one)(const struct net *net, + const struct nft_set *set, + void *priv); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); void (*walk)(const struct nft_ctx *ctx, @@ -326,6 +330,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @name: name of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) * @dtype: data type (verdict or numeric type defined by userspace) + * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal @@ -347,6 +352,7 @@ struct nft_set { char name[NFT_SET_MAXNAMELEN]; u32 ktype; u32 dtype; + u32 objtype; u32 size; atomic_t nelems; u32 ndeact; @@ -416,6 +422,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, * @NFT_SET_EXT_EXPIRATION: element expiration time * @NFT_SET_EXT_USERDATA: user data associated with the element * @NFT_SET_EXT_EXPR: expression assiociated with the element + * @NFT_SET_EXT_OBJREF: stateful object reference associated with element * @NFT_SET_EXT_NUM: number of extension types */ enum nft_set_extensions { @@ -426,6 +433,7 @@ enum nft_set_extensions { NFT_SET_EXT_EXPIRATION, NFT_SET_EXT_USERDATA, NFT_SET_EXT_EXPR, + NFT_SET_EXT_OBJREF, NFT_SET_EXT_NUM }; @@ -554,6 +562,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, return elem + set->ops->elemsize; } +static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) +{ + return nft_set_ext(ext, NFT_SET_EXT_OBJREF); +} + void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, @@ -875,6 +888,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @list: used internally * @chains: chains in the table * @sets: sets in the table + * @objects: stateful objects in the table * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) @@ -885,6 +899,7 @@ struct nft_table { struct list_head list; struct list_head chains; struct list_head sets; + struct list_head objects; u64 hgenerator; u32 use; u16 flags:14, @@ -935,6 +950,80 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v); /** + * struct nft_object - nf_tables stateful object + * + * @list: table stateful object list node + * @table: table this object belongs to + * @type: pointer to object type + * @data: pointer to object data + * @name: name of this stateful object + * @genmask: generation mask + * @use: number of references to this stateful object + * @data: object data, layout depends on type + */ +struct nft_object { + struct list_head list; + char name[NFT_OBJ_MAXNAMELEN]; + struct nft_table *table; + u32 genmask:2, + use:30; + /* runtime data below here */ + const struct nft_object_type *type ____cacheline_aligned; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); +}; + +static inline void *nft_obj_data(const struct nft_object *obj) +{ + return (void *)obj->data; +} + +#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); + +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, + int event, int family, int report, gfp_t gfp); + +/** + * struct nft_object_type - stateful object type + * + * @eval: stateful object evaluation function + * @list: list node in list of object types + * @type: stateful object numeric type + * @size: stateful object size + * @owner: module owner + * @maxattr: maximum netlink attribute + * @policy: netlink attribute policy + * @init: initialize object from netlink attributes + * @destroy: release existing stateful object + * @dump: netlink dump stateful object + */ +struct nft_object_type { + void (*eval)(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + struct list_head list; + u32 type; + unsigned int size; + unsigned int maxattr; + struct module *owner; + const struct nla_policy *policy; + int (*init)(const struct nlattr * const tb[], + struct nft_object *obj); + void (*destroy)(struct nft_object *obj); + int (*dump)(struct sk_buff *skb, + struct nft_object *obj, + bool reset); +}; + +int nft_register_obj(struct nft_object_type *obj_type); +void nft_unregister_obj(struct nft_object_type *obj_type); + +/** * struct nft_traceinfo - nft tracing information and state * * @pkt: pktinfo currently processed @@ -981,6 +1070,9 @@ void nft_trace_notify(struct nft_traceinfo *info); #define MODULE_ALIAS_NFT_SET() \ MODULE_ALIAS("nft-set") +#define MODULE_ALIAS_NFT_OBJ(type) \ + MODULE_ALIAS("nft-obj-" __stringify(type)) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@ -1157,4 +1249,11 @@ struct nft_trans_elem { #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +struct nft_trans_obj { + struct nft_object *obj; +}; + +#define nft_trans_obj(trans) \ + (((struct nft_trans_obj *)trans->data)->obj) + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 862373d..8f690ef 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -45,6 +45,7 @@ struct nft_payload_set { enum nft_registers sreg:8; u8 csum_type; u8 csum_offset; + u8 csum_flags; }; extern const struct nft_expr_ops nft_payload_fast_ops; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 3d06d94..cf799fc 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -6,6 +6,12 @@ #include <linux/atomic.h> #include <linux/workqueue.h> #include <linux/netfilter/nf_conntrack_tcp.h> +#ifdef CONFIG_NF_CT_PROTO_DCCP +#include <linux/netfilter/nf_conntrack_dccp.h> +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP +#include <linux/netfilter/nf_conntrack_sctp.h> +#endif #include <linux/seqlock.h> struct ctl_table_header; @@ -48,12 +54,49 @@ struct nf_icmp_net { unsigned int timeout; }; +#ifdef CONFIG_NF_CT_PROTO_DCCP +struct nf_dccp_net { + struct nf_proto_net pn; + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_SCTP +struct nf_sctp_net { + struct nf_proto_net pn; + unsigned int timeouts[SCTP_CONNTRACK_MAX]; +}; +#endif + +#ifdef CONFIG_NF_CT_PROTO_UDPLITE +enum udplite_conntrack { + UDPLITE_CT_UNREPLIED, + UDPLITE_CT_REPLIED, + UDPLITE_CT_MAX +}; + +struct nf_udplite_net { + struct nf_proto_net pn; + unsigned int timeouts[UDPLITE_CT_MAX]; +}; +#endif + struct nf_ip_net { struct nf_generic_net generic; struct nf_tcp_net tcp; struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; +#ifdef CONFIG_NF_CT_PROTO_DCCP + struct nf_dccp_net dccp; +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + struct nf_sctp_net sctp; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + struct nf_udplite_net udplite; +#endif }; struct ct_pcpu { diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 58487b1..cea396b 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -17,5 +17,11 @@ struct netns_nf { struct ctl_table_header *nf_log_dir_header; #endif struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) + bool defrag_ipv4; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + bool defrag_ipv6; +#endif }; #endif diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h index a9c3834..526b424 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h @@ -2,7 +2,10 @@ #define _NF_CONNTRACK_TUPLE_COMMON_H #include <linux/types.h> +#ifndef __KERNEL__ #include <linux/netfilter.h> +#endif +#include <linux/netfilter/nf_conntrack_common.h> /* IP_CT_IS_REPLY */ enum ip_conntrack_dir { IP_CT_DIR_ORIGINAL, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 14e5f61..881d49e 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,6 +4,7 @@ #define NFT_TABLE_MAXNAMELEN 32 #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 /** @@ -85,6 +86,10 @@ enum nft_verdicts { * @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes) * @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes) * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes) + * @NFT_MSG_NEWOBJ: create a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ: get a stateful object (enum nft_obj_attributes) + * @NFT_MSG_DELOBJ: delete a stateful object (enum nft_obj_attributes) + * @NFT_MSG_GETOBJ_RESET: get and reset a stateful object (enum nft_obj_attributes) */ enum nf_tables_msg_types { NFT_MSG_NEWTABLE, @@ -105,6 +110,10 @@ enum nf_tables_msg_types { NFT_MSG_NEWGEN, NFT_MSG_GETGEN, NFT_MSG_TRACE, + NFT_MSG_NEWOBJ, + NFT_MSG_GETOBJ, + NFT_MSG_DELOBJ, + NFT_MSG_GETOBJ_RESET, NFT_MSG_MAX, }; @@ -246,6 +255,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -254,6 +264,7 @@ enum nft_set_flags { NFT_SET_MAP = 0x8, NFT_SET_TIMEOUT = 0x10, NFT_SET_EVAL = 0x20, + NFT_SET_OBJECT = 0x40, }; /** @@ -295,6 +306,7 @@ enum nft_set_desc_attributes { * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64) * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32) * @NFTA_SET_USERDATA: user data (NLA_BINARY) + * @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*) */ enum nft_set_attributes { NFTA_SET_UNSPEC, @@ -312,6 +324,7 @@ enum nft_set_attributes { NFTA_SET_GC_INTERVAL, NFTA_SET_USERDATA, NFTA_SET_PAD, + NFTA_SET_OBJ_TYPE, __NFTA_SET_MAX }; #define NFTA_SET_MAX (__NFTA_SET_MAX - 1) @@ -335,6 +348,7 @@ enum nft_set_elem_flags { * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64) * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY) * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_SET_ELEM_OBJREF: stateful object reference (NLA_STRING) */ enum nft_set_elem_attributes { NFTA_SET_ELEM_UNSPEC, @@ -346,6 +360,7 @@ enum nft_set_elem_attributes { NFTA_SET_ELEM_USERDATA, NFTA_SET_ELEM_EXPR, NFTA_SET_ELEM_PAD, + NFTA_SET_ELEM_OBJREF, __NFTA_SET_ELEM_MAX }; #define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1) @@ -659,6 +674,10 @@ enum nft_payload_csum_types { NFT_PAYLOAD_CSUM_INET, }; +enum nft_payload_csum_flags { + NFT_PAYLOAD_L4CSUM_PSEUDOHDR = (1 << 0), +}; + /** * enum nft_payload_attributes - nf_tables payload expression netlink attributes * @@ -669,6 +688,7 @@ enum nft_payload_csum_types { * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers) * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32) * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32) + * @NFTA_PAYLOAD_CSUM_FLAGS: checksum flags (NLA_U32) */ enum nft_payload_attributes { NFTA_PAYLOAD_UNSPEC, @@ -679,6 +699,7 @@ enum nft_payload_attributes { NFTA_PAYLOAD_SREG, NFTA_PAYLOAD_CSUM_TYPE, NFTA_PAYLOAD_CSUM_OFFSET, + NFTA_PAYLOAD_CSUM_FLAGS, __NFTA_PAYLOAD_MAX }; #define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1) @@ -968,6 +989,7 @@ enum nft_queue_attributes { enum nft_quota_flags { NFT_QUOTA_F_INV = (1 << 0), + NFT_QUOTA_F_DEPLETED = (1 << 1), }; /** @@ -975,12 +997,14 @@ enum nft_quota_flags { * * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + * @NFTA_QUOTA_CONSUMED: quota already consumed in bytes (NLA_U64) */ enum nft_quota_attributes { NFTA_QUOTA_UNSPEC, NFTA_QUOTA_BYTES, NFTA_QUOTA_FLAGS, NFTA_QUOTA_PAD, + NFTA_QUOTA_CONSUMED, __NFTA_QUOTA_MAX }; #define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) @@ -1125,6 +1149,26 @@ enum nft_fwd_attributes { #define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1) /** + * enum nft_objref_attributes - nf_tables stateful object expression netlink attributes + * + * @NFTA_OBJREF_IMM_TYPE: object type for immediate reference (NLA_U32: nft_register) + * @NFTA_OBJREF_IMM_NAME: object name for immediate reference (NLA_STRING) + * @NFTA_OBJREF_SET_SREG: source register of the data to look for (NLA_U32: nft_registers) + * @NFTA_OBJREF_SET_NAME: name of the set where to look for (NLA_STRING) + * @NFTA_OBJREF_SET_ID: id of the set where to look for in this transaction (NLA_U32) + */ +enum nft_objref_attributes { + NFTA_OBJREF_UNSPEC, + NFTA_OBJREF_IMM_TYPE, + NFTA_OBJREF_IMM_NAME, + NFTA_OBJREF_SET_SREG, + NFTA_OBJREF_SET_NAME, + NFTA_OBJREF_SET_ID, + __NFTA_OBJREF_MAX +}; +#define NFTA_OBJREF_MAX (__NFTA_OBJREF_MAX - 1) + +/** * enum nft_gen_attributes - nf_tables ruleset generation attributes * * @NFTA_GEN_ID: Ruleset generation ID (NLA_U32) @@ -1172,6 +1216,32 @@ enum nft_fib_flags { NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */ }; +#define NFT_OBJECT_UNSPEC 0 +#define NFT_OBJECT_COUNTER 1 +#define NFT_OBJECT_QUOTA 2 +#define __NFT_OBJECT_MAX 3 +#define NFT_OBJECT_MAX (__NFT_OBJECT_MAX - 1) + +/** + * enum nft_object_attributes - nf_tables stateful object netlink attributes + * + * @NFTA_OBJ_TABLE: name of the table containing the expression (NLA_STRING) + * @NFTA_OBJ_NAME: name of this expression type (NLA_STRING) + * @NFTA_OBJ_TYPE: stateful object type (NLA_U32) + * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) + * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) + */ +enum nft_object_attributes { + NFTA_OBJ_UNSPEC, + NFTA_OBJ_TABLE, + NFTA_OBJ_NAME, + NFTA_OBJ_TYPE, + NFTA_OBJ_DATA, + NFTA_OBJ_USE, + __NFTA_OBJ_MAX +}; +#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) + /** * enum nft_trace_attributes - nf_tables trace netlink attributes * diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h index 1fad2c2..b97725a 100644 --- a/include/uapi/linux/netfilter/xt_bpf.h +++ b/include/uapi/linux/netfilter/xt_bpf.h @@ -2,9 +2,11 @@ #define _XT_BPF_H #include <linux/filter.h> +#include <linux/limits.h> #include <linux/types.h> #define XT_BPF_MAX_NUM_INSTR 64 +#define XT_BPF_PATH_MAX (XT_BPF_MAX_NUM_INSTR * sizeof(struct sock_filter)) struct bpf_prog; @@ -16,4 +18,23 @@ struct xt_bpf_info { struct bpf_prog *filter __attribute__((aligned(8))); }; +enum xt_bpf_modes { + XT_BPF_MODE_BYTECODE, + XT_BPF_MODE_FD_PINNED, + XT_BPF_MODE_FD_ELF, +}; + +struct xt_bpf_info_v1 { + __u16 mode; + __u16 bpf_program_num_elem; + __s32 fd; + union { + struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR]; + char path[XT_BPF_PATH_MAX]; + }; + + /* only used in the kernel */ + struct bpf_prog *filter __attribute__((aligned(8))); +}; + #endif /*_XT_BPF_H */ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 83d937f..b12501a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1008,10 +1008,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, struct nf_hook_state state; int ret; - elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - - while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) - elem = rcu_dereference(elem->next); + for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF; + elem = rcu_dereference(elem->next)) + ; if (!elem) return okfn(net, sk, skb); diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index c197b1f..bd2b3c7 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -24,7 +24,8 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + nf_log_l2packet(net, pf, eth_hdr(skb)->h_proto, hooknum, skb, + in, out, loginfo, prefix); } static struct nf_logger nf_bridge_logger __read_mostly = { diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 848a070..1258a9a 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -411,17 +411,15 @@ static inline int check_target(struct arpt_entry *e, const char *name) } static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; - unsigned long pcnt; int ret; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, @@ -439,7 +437,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) err: module_put(t->u.kernel.target->me); out: - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -519,7 +517,7 @@ static inline void cleanup_entry(struct arpt_entry *e) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in @@ -528,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e) static int translate_table(struct xt_table_info *newinfo, void *entry0, const struct arpt_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct arpt_entry *iter; unsigned int *offsets; unsigned int i; @@ -590,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, repl->name, repl->size); + ret = find_check_entry(iter, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 46815c8..308b456 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name) static int find_check_entry(struct ipt_entry *e, struct net *net, const char *name, - unsigned int size) + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; @@ -539,12 +540,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - unsigned long pcnt; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -582,7 +580,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, cleanup_match(ematch, net); } - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -670,7 +668,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in @@ -679,6 +677,7 @@ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ipt_entry *iter; unsigned int *offsets; unsigned int i; @@ -738,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, net, repl->name, repl->size); + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index e6e206f..21db00d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -419,7 +419,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) } cipinfo->config = config; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -444,7 +444,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_get(par->net, par->family); } #ifdef CONFIG_COMPAT diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 34cfb9b..a03e4e7 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -41,7 +41,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par) pr_debug("bad rangesize %u\n", mr->rangesize); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); } static unsigned int @@ -59,6 +59,11 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) xt_out(par)); } +static void masquerade_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); +} + static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", .family = NFPROTO_IPV4, @@ -67,6 +72,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = { .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, .checkentry = masquerade_tg_check, + .destroy = masquerade_tg_destroy, .me = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 3614116..30c0de5 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -418,12 +418,12 @@ static int synproxy_tg4_check(const struct xt_tgchk_param *par) e->ip.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_l3proto_try_module_get(par->family); + return nf_ct_netns_get(par->net, par->family); } static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg4_reg __read_mostly = { diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 59b4994..f273098 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -83,10 +83,12 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return true ^ invert; iph = ip_hdr(skb); - if (ipv4_is_multicast(iph->daddr)) { - if (ipv4_is_zeronet(iph->saddr)) - return ipv4_is_local_multicast(iph->daddr) ^ invert; + if (ipv4_is_zeronet(iph->saddr)) { + if (ipv4_is_lbcast(iph->daddr) || + ipv4_is_local_multicast(iph->daddr)) + return true ^ invert; } + flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7130ed5..fcfd071 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -31,6 +31,13 @@ #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #include <net/netfilter/nf_log.h> +static int conntrack4_net_id __read_mostly; +static DEFINE_MUTEX(register_ipv4_hooks); + +struct conntrack4_net { + unsigned int users; +}; + static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -307,9 +314,42 @@ static struct nf_sockopt_ops so_getorigdst = { .owner = THIS_MODULE, }; -static int ipv4_init_net(struct net *net) +static int ipv4_hooks_register(struct net *net) { - return 0; + struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); + int err = 0; + + mutex_lock(®ister_ipv4_hooks); + + cnet->users++; + if (cnet->users > 1) + goto out_unlock; + + err = nf_defrag_ipv4_enable(net); + if (err) { + cnet->users = 0; + goto out_unlock; + } + + err = nf_register_net_hooks(net, ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); + + if (err) + cnet->users = 0; + out_unlock: + mutex_unlock(®ister_ipv4_hooks); + return err; +} + +static void ipv4_hooks_unregister(struct net *net) +{ + struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); + + mutex_lock(®ister_ipv4_hooks); + if (cnet->users && (--cnet->users == 0)) + nf_unregister_net_hooks(net, ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); + mutex_unlock(®ister_ipv4_hooks); } struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { @@ -325,7 +365,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, #endif - .init_net = ipv4_init_net, + .net_ns_get = ipv4_hooks_register, + .net_ns_put = ipv4_hooks_unregister, .me = THIS_MODULE, }; @@ -340,6 +381,15 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { &nf_conntrack_l4proto_tcp4, &nf_conntrack_l4proto_udp4, &nf_conntrack_l4proto_icmp, +#ifdef CONFIG_NF_CT_PROTO_DCCP + &nf_conntrack_l4proto_dccp4, +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + &nf_conntrack_l4proto_sctp4, +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + &nf_conntrack_l4proto_udplite4, +#endif }; static int ipv4_net_init(struct net *net) @@ -369,6 +419,8 @@ static void ipv4_net_exit(struct net *net) static struct pernet_operations ipv4_net_ops = { .init = ipv4_net_init, .exit = ipv4_net_exit, + .id = &conntrack4_net_id, + .size = sizeof(struct conntrack4_net), }; static int __init nf_conntrack_l3proto_ipv4_init(void) @@ -376,7 +428,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); - nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { @@ -390,17 +441,10 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_sockopt; } - ret = nf_register_hooks(ipv4_conntrack_ops, - ARRAY_SIZE(ipv4_conntrack_ops)); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: can't register hooks.\n"); - goto cleanup_pernet; - } - ret = nf_ct_l4proto_register(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); if (ret < 0) - goto cleanup_hooks; + goto cleanup_pernet; ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); if (ret < 0) { @@ -412,8 +456,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) cleanup_l4proto: nf_ct_l4proto_unregister(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); - cleanup_hooks: - nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); cleanup_pernet: unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: @@ -427,7 +469,6 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); - nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); unregister_pernet_subsys(&ipv4_net_ops); nf_unregister_sockopt(&so_getorigdst); } diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index d88da36b..49bd6a5 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -11,6 +11,7 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> +#include <net/netns/generic.h> #include <net/route.h> #include <net/ip.h> @@ -22,6 +23,8 @@ #endif #include <net/netfilter/nf_conntrack_zones.h> +static DEFINE_MUTEX(defrag4_mutex); + static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, u_int32_t user) { @@ -102,18 +105,50 @@ static struct nf_hook_ops ipv4_defrag_ops[] = { }, }; +static void __net_exit defrag4_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv4) { + nf_unregister_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + net->nf.defrag_ipv4 = false; + } +} + +static struct pernet_operations defrag4_net_ops = { + .exit = defrag4_net_exit, +}; + static int __init nf_defrag_init(void) { - return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); + return register_pernet_subsys(&defrag4_net_ops); } static void __exit nf_defrag_fini(void) { - nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); + unregister_pernet_subsys(&defrag4_net_ops); } -void nf_defrag_ipv4_enable(void) +int nf_defrag_ipv4_enable(struct net *net) { + int err = 0; + + might_sleep(); + + if (net->nf.defrag_ipv4) + return 0; + + mutex_lock(&defrag4_mutex); + if (net->nf.defrag_ipv4) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv4_defrag_ops, + ARRAY_SIZE(ipv4_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv4 = true; + + out_unlock: + mutex_unlock(&defrag4_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 1b49966..965b1a1 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -101,12 +101,13 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, } iph = ip_hdr(pkt->skb); - if (ipv4_is_multicast(iph->daddr) && - ipv4_is_zeronet(iph->saddr) && - ipv4_is_local_multicast(iph->daddr)) { - nft_fib_store_result(dest, priv->result, pkt, - get_ifindex(pkt->skb->dev)); - return; + if (ipv4_is_zeronet(iph->saddr)) { + if (ipv4_is_lbcast(iph->daddr) || + ipv4_is_local_multicast(iph->daddr)) { + nft_fib_store_result(dest, priv->result, pkt, + get_ifindex(pkt->skb->dev)); + return; + } } if (priv->flags & NFTA_FIB_F_MARK) @@ -122,6 +123,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, fl4.saddr = get_saddr(iph->daddr); } + *dest = 0; + if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return; @@ -198,7 +201,7 @@ nft_fib4_select_ops(const struct nft_ctx *ctx, if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); - result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 4f697e4..a0ea8aa 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,12 +35,19 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, &range, nft_out(pkt)); } +static void +nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV4); +} + static struct nft_expr_type nft_masq_ipv4_type; static const struct nft_expr_ops nft_masq_ipv4_ops = { .type = &nft_masq_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), .eval = nft_masq_ipv4_eval, .init = nft_masq_init, + .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, }; @@ -77,5 +84,5 @@ module_init(nft_masq_ipv4_module_init); module_exit(nft_masq_ipv4_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq"); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 16df049..1650ed2 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -38,12 +38,19 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr, regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt)); } +static void +nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV4); +} + static struct nft_expr_type nft_redir_ipv4_type; static const struct nft_expr_ops nft_redir_ipv4_ops = { .type = &nft_redir_ipv4_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_ipv4_eval, .init = nft_redir_init, + .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, }; @@ -71,5 +78,5 @@ module_init(nft_redir_ipv4_module_init); module_exit(nft_redir_ipv4_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6ff42b8..d56d8ac 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name) static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, - unsigned int size) + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; @@ -570,12 +571,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - unsigned long pcnt; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; j = 0; mtpar.net = net; @@ -612,7 +610,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, cleanup_match(ematch, net); } - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -699,8 +697,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in @@ -709,6 +706,7 @@ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; @@ -768,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, net, repl->name, repl->size); + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 99a1216..98c8dd3 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -440,12 +440,12 @@ static int synproxy_tg6_check(const struct xt_tgchk_param *par) e->ipv6.invflags & XT_INV_PROTO) return -EINVAL; - return nf_ct_l3proto_try_module_get(par->family); + return nf_ct_netns_get(par->net, par->family); } static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target synproxy_tg6_reg __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 500be28..4e34024 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -34,6 +34,13 @@ #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_log.h> +static int conntrack6_net_id; +static DEFINE_MUTEX(register_ipv6_hooks); + +struct conntrack6_net { + unsigned int users; +}; + static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) { @@ -308,6 +315,42 @@ static int ipv6_nlattr_tuple_size(void) } #endif +static int ipv6_hooks_register(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + int err = 0; + + mutex_lock(®ister_ipv6_hooks); + cnet->users++; + if (cnet->users > 1) + goto out_unlock; + + err = nf_defrag_ipv6_enable(net); + if (err < 0) { + cnet->users = 0; + goto out_unlock; + } + + err = nf_register_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + if (err) + cnet->users = 0; + out_unlock: + mutex_unlock(®ister_ipv6_hooks); + return err; +} + +static void ipv6_hooks_unregister(struct net *net) +{ + struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); + + mutex_lock(®ister_ipv6_hooks); + if (cnet->users && (--cnet->users == 0)) + nf_unregister_net_hooks(net, ipv6_conntrack_ops, + ARRAY_SIZE(ipv6_conntrack_ops)); + mutex_unlock(®ister_ipv6_hooks); +} + struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .name = "ipv6", @@ -321,6 +364,8 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, #endif + .net_ns_get = ipv6_hooks_register, + .net_ns_put = ipv6_hooks_unregister, .me = THIS_MODULE, }; @@ -340,6 +385,15 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { &nf_conntrack_l4proto_tcp6, &nf_conntrack_l4proto_udp6, &nf_conntrack_l4proto_icmpv6, +#ifdef CONFIG_NF_CT_PROTO_DCCP + &nf_conntrack_l4proto_dccp6, +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + &nf_conntrack_l4proto_sctp6, +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + &nf_conntrack_l4proto_udplite6, +#endif }; static int ipv6_net_init(struct net *net) @@ -370,6 +424,8 @@ static void ipv6_net_exit(struct net *net) static struct pernet_operations ipv6_net_ops = { .init = ipv6_net_init, .exit = ipv6_net_exit, + .id = &conntrack6_net_id, + .size = sizeof(struct conntrack6_net), }; static int __init nf_conntrack_l3proto_ipv6_init(void) @@ -377,7 +433,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) int ret = 0; need_conntrack(); - nf_defrag_ipv6_enable(); ret = nf_register_sockopt(&so_getorigdst6); if (ret < 0) { @@ -389,18 +444,10 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) if (ret < 0) goto cleanup_sockopt; - ret = nf_register_hooks(ipv6_conntrack_ops, - ARRAY_SIZE(ipv6_conntrack_ops)); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't register pre-routing defrag " - "hook.\n"); - goto cleanup_pernet; - } - ret = nf_ct_l4proto_register(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); if (ret < 0) - goto cleanup_hooks; + goto cleanup_pernet; ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); if (ret < 0) { @@ -411,8 +458,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_l4proto: nf_ct_l4proto_unregister(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); - cleanup_hooks: - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); cleanup_pernet: unregister_pernet_subsys(&ipv6_net_ops); cleanup_sockopt: @@ -426,7 +471,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); nf_ct_l4proto_unregister(builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); - nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); nf_unregister_sockopt(&so_getorigdst6); } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index f06b047..8e0bdd0 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -30,6 +30,8 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +static DEFINE_MUTEX(defrag6_mutex); + static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { @@ -87,6 +89,19 @@ static struct nf_hook_ops ipv6_defrag_ops[] = { }, }; +static void __net_exit defrag6_net_exit(struct net *net) +{ + if (net->nf.defrag_ipv6) { + nf_unregister_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + net->nf.defrag_ipv6 = false; + } +} + +static struct pernet_operations defrag6_net_ops = { + .exit = defrag6_net_exit, +}; + static int __init nf_defrag_init(void) { int ret = 0; @@ -96,9 +111,9 @@ static int __init nf_defrag_init(void) pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); return ret; } - ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + ret = register_pernet_subsys(&defrag6_net_ops); if (ret < 0) { - pr_err("nf_defrag_ipv6: can't register hooks\n"); + pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } return ret; @@ -111,12 +126,31 @@ cleanup_frag6: static void __exit nf_defrag_fini(void) { - nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } -void nf_defrag_ipv6_enable(void) +int nf_defrag_ipv6_enable(struct net *net) { + int err = 0; + + might_sleep(); + + if (net->nf.defrag_ipv6) + return 0; + + mutex_lock(&defrag6_mutex); + if (net->nf.defrag_ipv6) + goto out_unlock; + + err = nf_register_net_hooks(net, ipv6_defrag_ops, + ARRAY_SIZE(ipv6_defrag_ops)); + if (err == 0) + net->nf.defrag_ipv6 = true; + + out_unlock: + mutex_unlock(&defrag6_mutex); + return err; } EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index d526bb5..c947aad 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -235,7 +235,7 @@ nft_fib6_select_ops(const struct nft_ctx *ctx, if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); - result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index a2aff12..6c5b5b1 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -36,12 +36,19 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, nft_out(pkt)); } +static void +nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); +} + static struct nft_expr_type nft_masq_ipv6_type; static const struct nft_expr_ops nft_masq_ipv6_ops = { .type = &nft_masq_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)), .eval = nft_masq_ipv6_eval, .init = nft_masq_init, + .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, }; @@ -78,5 +85,5 @@ module_init(nft_masq_ipv6_module_init); module_exit(nft_masq_ipv6_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq"); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index bfcd5af..f5ac080 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -39,12 +39,19 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr, nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt)); } +static void +nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + nf_ct_netns_put(ctx->net, NFPROTO_IPV6); +} + static struct nft_expr_type nft_redir_ipv6_type; static const struct nft_expr_ops nft_redir_ipv6_ops = { .type = &nft_redir_ipv6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), .eval = nft_redir_ipv6_eval, .init = nft_redir_init, + .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, }; @@ -72,5 +79,5 @@ module_init(nft_redir_ipv6_module_init); module_exit(nft_redir_ipv6_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 44410d3..63729b4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -146,38 +146,38 @@ config NF_CONNTRACK_LABELS to connection tracking entries. It selected by the connlabel match. config NF_CT_PROTO_DCCP - tristate 'DCCP protocol connection tracking support' + bool 'DCCP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_DCCP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on DCCP connections. - If unsure, say 'N'. + If unsure, say Y. config NF_CT_PROTO_GRE tristate config NF_CT_PROTO_SCTP - tristate 'SCTP protocol connection tracking support' + bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED - default IP_SCTP + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + If unsure, say Y. config NF_CT_PROTO_UDPLITE - tristate 'UDP-Lite protocol connection tracking support' + bool 'UDP-Lite protocol connection tracking support' depends on NETFILTER_ADVANCED + default y help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on UDP-Lite connections. - To compile it as a module, choose M here. If unsure, say N. + If unsure, say Y. config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" @@ -384,17 +384,17 @@ config NF_NAT_NEEDED default y config NF_NAT_PROTO_DCCP - tristate + bool depends on NF_NAT && NF_CT_PROTO_DCCP default NF_NAT && NF_CT_PROTO_DCCP config NF_NAT_PROTO_UDPLITE - tristate + bool depends on NF_NAT && NF_CT_PROTO_UDPLITE default NF_NAT && NF_CT_PROTO_UDPLITE config NF_NAT_PROTO_SCTP - tristate + bool default NF_NAT && NF_CT_PROTO_SCTP depends on NF_NAT && NF_CT_PROTO_SCTP select LIBCRC32C @@ -551,6 +551,12 @@ config NFT_NAT This option adds the "nat" expression that you can use to perform typical Network Address Translation (NAT) packet transformations. +config NFT_OBJREF + tristate "Netfilter nf_tables stateful object reference module" + help + This option adds the "objref" expression that allows you to refer to + stateful objects, such as counters and quotas. + config NFT_QUEUE depends on NETFILTER_NETLINK_QUEUE tristate "Netfilter nf_tables queue module" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5bbf767..ca30d19 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -5,6 +5,9 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o +nf_conntrack-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o obj-$(CONFIG_NETFILTER) = netfilter.o @@ -16,11 +19,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o # connection tracking obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o -# SCTP protocol connection tracking -obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o -obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o -obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o @@ -45,6 +44,11 @@ obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o +# NAT protocols (nf_nat) +nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o +nf_nat-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o + # generic transport layer logging obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o @@ -54,11 +58,6 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o obj-$(CONFIG_NF_NAT) += nf_nat.o obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o -# NAT protocols (nf_nat) -obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o -obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o -obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o - # NAT helpers obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -89,6 +88,7 @@ obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o +obj-$(CONFIG_NFT_OBJREF) += nft_objref.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index de30e08..ce6adfa 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -102,17 +102,14 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (!entry) return -ENOMEM; - entry->orig_ops = reg; - entry->ops = *reg; - entry->next = NULL; + nf_hook_entry_init(entry, reg); mutex_lock(&nf_hook_mutex); /* Find the spot in the list */ - while ((p = nf_entry_dereference(*pp)) != NULL) { - if (reg->priority < p->orig_ops->priority) + for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { + if (reg->priority < nf_hook_entry_priority(p)) break; - pp = &p->next; } rcu_assign_pointer(entry->next, p); rcu_assign_pointer(*pp, entry); @@ -139,12 +136,11 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) return; mutex_lock(&nf_hook_mutex); - while ((p = nf_entry_dereference(*pp)) != NULL) { - if (p->orig_ops == reg) { + for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { + if (nf_hook_entry_ops(p) == reg) { rcu_assign_pointer(*pp, p->next); break; } - pp = &p->next; } mutex_unlock(&nf_hook_mutex); if (!p) { @@ -311,7 +307,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, int ret; do { - verdict = entry->ops.hook(entry->ops.priv, skb, state); + verdict = nf_hook_entry_hookfn(entry, skb, state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: entry = rcu_dereference(entry->next); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 038c2ba..3d02b0c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3260,7 +3260,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); - if (IS_ERR(svc) || svc == NULL) + if (IS_ERR_OR_NULL(svc)) goto out_err; /* Dump the destinations */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 01d3d89..4e1a98f 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -254,6 +254,54 @@ static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, return true; } +static inline bool decrement_ttl(struct netns_ipvs *ipvs, + int skb_af, + struct sk_buff *skb) +{ + struct net *net = ipvs->net; + +#ifdef CONFIG_IP_VS_IPV6 + if (skb_af == AF_INET6) { + struct dst_entry *dst = skb_dst(skb); + + /* check and decrement ttl */ + if (ipv6_hdr(skb)->hop_limit <= 1) { + /* Force OUTPUT device used as source address */ + skb->dev = dst->dev; + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0); + __IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); + + return false; + } + + /* don't propagate ttl change to cloned packets */ + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + return false; + + ipv6_hdr(skb)->hop_limit--; + } else +#endif + { + if (ip_hdr(skb)->ttl <= 1) { + /* Tell the sender its packet died... */ + __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); + icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); + return false; + } + + /* don't propagate ttl change to cloned packets */ + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return false; + + /* Decrease ttl */ + ip_decrease_ttl(ip_hdr(skb)); + } + + return true; +} + /* Get route to destination or remote server */ static int __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, @@ -326,6 +374,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, return local; } + if (!decrement_ttl(ipvs, skb_af, skb)) + goto err_put; + if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { mtu = dst_mtu(&rt->dst); } else { @@ -473,6 +524,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, return local; } + if (!decrement_ttl(ipvs, skb_af, skb)) + goto err_put; + /* MTU checking */ if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) mtu = dst_mtu(&rt->dst); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9bd3464..2d6ee18 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -125,6 +125,54 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + const struct nf_conntrack_l3proto *l3proto; + int ret; + + might_sleep(); + + ret = nf_ct_l3proto_try_module_get(nfproto); + if (ret < 0) + return ret; + + /* we already have a reference, can't fail */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (!l3proto->net_ns_get) + return 0; + + ret = l3proto->net_ns_get(net); + if (ret < 0) + nf_ct_l3proto_module_put(nfproto); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_netns_get); + +void nf_ct_netns_put(struct net *net, u8 nfproto) +{ + const struct nf_conntrack_l3proto *l3proto; + + might_sleep(); + + /* same as nf_conntrack_netns_get(), reference assumed */ + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nfproto); + rcu_read_unlock(); + + if (WARN_ON(!l3proto)) + return; + + if (l3proto->net_ns_put) + l3proto->net_ns_put(net); + + nf_ct_l3proto_module_put(nfproto); +} +EXPORT_SYMBOL_GPL(nf_ct_netns_put); + struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) { @@ -190,20 +238,19 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); +#ifdef CONFIG_SYSCTL +extern unsigned int nf_conntrack_default_on; + int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret; - - if (proto->init_net) { - ret = proto->init_net(net); - if (ret < 0) - return ret; - } + if (nf_conntrack_default_on == 0) + return 0; - return 0; + return proto->net_ns_get ? proto->net_ns_get(net) : 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); +#endif void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { @@ -224,6 +271,16 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { + /* + * nf_conntrack_default_on *might* have registered hooks. + * ->net_ns_put must cope with more puts() than get(), i.e. + * if nf_conntrack_default_on was 0 at time of + * nf_ct_l3proto_pernet_register invocation this net_ns_put() + * should be a noop. + */ + if (proto->net_ns_put) + proto->net_ns_put(net); + /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 073b047..b68ce6a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -9,7 +9,6 @@ * */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/sysctl.h> #include <linux/spinlock.h> @@ -384,17 +383,9 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; -/* this module per-net specifics */ -static unsigned int dccp_net_id __read_mostly; -struct dccp_net { - struct nf_proto_net pn; - int dccp_loose; - unsigned int dccp_timeout[CT_DCCP_MAX + 1]; -}; - -static inline struct dccp_net *dccp_pernet(struct net *net) +static inline struct nf_dccp_net *dccp_pernet(struct net *net) { - return net_generic(net, dccp_net_id); + return &net->ct.nf_ct_proto.dccp; } static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -424,7 +415,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); - struct dccp_net *dn; + struct nf_dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -719,7 +710,7 @@ static int dccp_nlattr_size(void) static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); unsigned int *timeouts = data; int i; @@ -820,7 +811,7 @@ static struct ctl_table dccp_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, - struct dccp_net *dn) + struct nf_dccp_net *dn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -850,7 +841,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn, static int dccp_init_net(struct net *net, u_int16_t proto) { - struct dccp_net *dn = dccp_pernet(net); + struct nf_dccp_net *dn = dccp_pernet(net); struct nf_proto_net *pn = &dn->pn; if (!pn->users) { @@ -868,7 +859,7 @@ static int dccp_init_net(struct net *net, u_int16_t proto) return dccp_kmemdup_sysctl_table(net, pn, dn); } -static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -898,11 +889,11 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); -static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, .name = "dccp", @@ -932,56 +923,6 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .nla_policy = dccp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &dccp_net_id, .init_net = dccp_init_net, }; - -static struct nf_conntrack_l4proto *dccp_proto[] = { - &dccp_proto4, - &dccp_proto6, -}; - -static __net_init int dccp_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, dccp_proto, - ARRAY_SIZE(dccp_proto)); -} - -static __net_exit void dccp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, dccp_proto, - ARRAY_SIZE(dccp_proto)); -} - -static struct pernet_operations dccp_net_ops = { - .init = dccp_net_init, - .exit = dccp_net_exit, - .id = &dccp_net_id, - .size = sizeof(struct dccp_net), -}; - -static int __init nf_conntrack_proto_dccp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&dccp_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto)); - if (ret < 0) - unregister_pernet_subsys(&dccp_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_dccp_fini(void) -{ - nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto)); - unregister_pernet_subsys(&dccp_net_ops); -} - -module_init(nf_conntrack_proto_dccp_init); -module_exit(nf_conntrack_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("DCCP connection tracking protocol helper"); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp6); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d096c2d..a0efde3 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -15,7 +15,6 @@ #include <linux/types.h> #include <linux/timer.h> #include <linux/netfilter.h> -#include <linux/module.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/sctp.h> @@ -144,15 +143,9 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { } }; -static unsigned int sctp_net_id __read_mostly; -struct sctp_net { - struct nf_proto_net pn; - unsigned int timeouts[SCTP_CONNTRACK_MAX]; -}; - -static inline struct sctp_net *sctp_pernet(struct net *net) +static inline struct nf_sctp_net *sctp_pernet(struct net *net) { - return net_generic(net, sctp_net_id); + return &net->ct.nf_ct_proto.sctp; } static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -600,7 +593,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); int i; /* set default SCTP timeouts. */ @@ -708,7 +701,7 @@ static struct ctl_table sctp_sysctl_table[] = { #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) + struct nf_sctp_net *sn) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -735,7 +728,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, static int sctp_init_net(struct net *net, u_int16_t proto) { - struct sctp_net *sn = sctp_pernet(net); + struct nf_sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; if (!pn->users) { @@ -748,7 +741,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) return sctp_kmemdup_sysctl_table(pn, sn); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -778,11 +771,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .nla_policy = sctp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { +struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, .name = "sctp", @@ -812,57 +805,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #endif - .net_id = &sctp_net_id, .init_net = sctp_init_net, }; - -static struct nf_conntrack_l4proto *sctp_proto[] = { - &nf_conntrack_l4proto_sctp4, - &nf_conntrack_l4proto_sctp6, -}; - -static int sctp_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, sctp_proto, - ARRAY_SIZE(sctp_proto)); -} - -static void sctp_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, sctp_proto, - ARRAY_SIZE(sctp_proto)); -} - -static struct pernet_operations sctp_net_ops = { - .init = sctp_net_init, - .exit = sctp_net_exit, - .id = &sctp_net_id, - .size = sizeof(struct sctp_net), -}; - -static int __init nf_conntrack_proto_sctp_init(void) -{ - int ret; - - ret = register_pernet_subsys(&sctp_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto)); - if (ret < 0) - unregister_pernet_subsys(&sctp_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_sctp_fini(void) -{ - nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto)); - unregister_pernet_subsys(&sctp_net_ops); -} - -module_init(nf_conntrack_proto_sctp_init); -module_exit(nf_conntrack_proto_sctp_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kiran Kumar Immidi"); -MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); -MODULE_ALIAS("ip_conntrack_proto_sctp"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp6); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 7808604..c35f7bf 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -9,7 +9,6 @@ #include <linux/types.h> #include <linux/timer.h> -#include <linux/module.h> #include <linux/udp.h> #include <linux/seq_file.h> #include <linux/skbuff.h> @@ -24,26 +23,14 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> -enum udplite_conntrack { - UDPLITE_CT_UNREPLIED, - UDPLITE_CT_REPLIED, - UDPLITE_CT_MAX -}; - static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = { [UDPLITE_CT_UNREPLIED] = 30*HZ, [UDPLITE_CT_REPLIED] = 180*HZ, }; -static unsigned int udplite_net_id __read_mostly; -struct udplite_net { - struct nf_proto_net pn; - unsigned int timeouts[UDPLITE_CT_MAX]; -}; - -static inline struct udplite_net *udplite_pernet(struct net *net) +static inline struct nf_udplite_net *udplite_pernet(struct net *net) { - return net_generic(net, udplite_net_id); + return &net->ct.nf_ct_proto.udplite; } static bool udplite_pkt_to_tuple(const struct sk_buff *skb, @@ -178,7 +165,7 @@ static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { unsigned int *timeouts = data; - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); /* set default timeouts for UDPlite. */ timeouts[UDPLITE_CT_UNREPLIED] = un->timeouts[UDPLITE_CT_UNREPLIED]; @@ -237,7 +224,7 @@ static struct ctl_table udplite_sysctl_table[] = { #endif /* CONFIG_SYSCTL */ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, - struct udplite_net *un) + struct nf_udplite_net *un) { #ifdef CONFIG_SYSCTL if (pn->ctl_table) @@ -257,7 +244,7 @@ static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn, static int udplite_init_net(struct net *net, u_int16_t proto) { - struct udplite_net *un = udplite_pernet(net); + struct nf_udplite_net *un = udplite_pernet(net); struct nf_proto_net *pn = &un->pn; if (!pn->users) { @@ -270,7 +257,7 @@ static int udplite_init_net(struct net *net, u_int16_t proto) return udplite_kmemdup_sysctl_table(pn, un); } -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, @@ -299,11 +286,11 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite4); -static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = +struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, @@ -332,54 +319,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nla_policy = udplite_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ - .net_id = &udplite_net_id, .init_net = udplite_init_net, }; - -static struct nf_conntrack_l4proto *udplite_proto[] = { - &nf_conntrack_l4proto_udplite4, - &nf_conntrack_l4proto_udplite6, -}; - -static int udplite_net_init(struct net *net) -{ - return nf_ct_l4proto_pernet_register(net, udplite_proto, - ARRAY_SIZE(udplite_proto)); -} - -static void udplite_net_exit(struct net *net) -{ - nf_ct_l4proto_pernet_unregister(net, udplite_proto, - ARRAY_SIZE(udplite_proto)); -} - -static struct pernet_operations udplite_net_ops = { - .init = udplite_net_init, - .exit = udplite_net_exit, - .id = &udplite_net_id, - .size = sizeof(struct udplite_net), -}; - -static int __init nf_conntrack_proto_udplite_init(void) -{ - int ret; - - ret = register_pernet_subsys(&udplite_net_ops); - if (ret < 0) - return ret; - ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto)); - if (ret < 0) - unregister_pernet_subsys(&udplite_net_ops); - return ret; -} - -static void __exit nf_conntrack_proto_udplite_exit(void) -{ - nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto)); - unregister_pernet_subsys(&udplite_net_ops); -} - -module_init(nf_conntrack_proto_udplite_init); -module_exit(nf_conntrack_proto_udplite_exit); - -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5f446cd..d009ae6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -452,6 +452,9 @@ static int log_invalid_proto_max __read_mostly = 255; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; +extern unsigned int nf_conntrack_default_on; +unsigned int nf_conntrack_default_on __read_mostly = 1; + static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -517,6 +520,13 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "nf_conntrack_default_on", + .data = &nf_conntrack_default_on, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 44ae986..c9d7f95 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -14,6 +14,29 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) +{ + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + + skb->dev = dev; + dev_queue_xmit(skb); +} + +void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif) +{ + struct net_device *dev; + + dev = dev_get_by_index_rcu(nft_net(pkt), oif); + if (!dev) { + kfree_skb(pkt->skb); + return; + } + + nf_do_netdev_egress(pkt->skb, dev); +} +EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress); + void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) { struct net_device *dev; @@ -24,14 +47,8 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) return; skb = skb_clone(pkt->skb, GFP_ATOMIC); - if (skb == NULL) - return; - - if (skb_mac_header_was_set(skb)) - skb_push(skb, skb->mac_len); - - skb->dev = dev; - dev_queue_xmit(skb); + if (skb) + nf_do_netdev_egress(skb, dev); } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index ed9b808..dc61399 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -177,6 +177,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_packet_common); /* bridge and netdev logging families share this code. */ void nf_log_l2packet(struct net *net, u_int8_t pf, + __be16 protocol, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -184,7 +185,7 @@ void nf_log_l2packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - switch (eth_hdr(skb)->h_proto) { + switch (protocol) { case htons(ETH_P_IP): nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out, loginfo, "%s", prefix); diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c index 1f64594..350eb14 100644 --- a/net/netfilter/nf_log_netdev.c +++ b/net/netfilter/nf_log_netdev.c @@ -23,7 +23,8 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf, const struct nf_loginfo *loginfo, const char *prefix) { - nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix); + nf_log_l2packet(net, pf, skb->protocol, hooknum, skb, in, out, + loginfo, prefix); } static struct nf_logger nf_netdev_logger __read_mostly = { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 5b9c884..94b14c5 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -682,6 +682,18 @@ int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) &nf_nat_l4proto_tcp); RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], &nf_nat_l4proto_udp); +#ifdef CONFIG_NF_NAT_PROTO_DCCP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP], + &nf_nat_l4proto_dccp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_SCTP + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP], + &nf_nat_l4proto_sctp); +#endif +#ifdef CONFIG_NF_NAT_PROTO_UDPLITE + RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE], + &nf_nat_l4proto_udplite); +#endif mutex_unlock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index 15c47b2..269fcd5 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -10,8 +10,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> #include <linux/skbuff.h> #include <linux/dccp.h> @@ -73,7 +71,7 @@ dccp_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { +const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .l4proto = IPPROTO_DCCP, .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -82,35 +80,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_dccp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_dccp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); -err1: - return err; -} - -static void __exit nf_nat_proto_dccp_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp); - -} - -module_init(nf_nat_proto_dccp_init); -module_exit(nf_nat_proto_dccp_fini); - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("DCCP NAT protocol helper"); -MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index cbc7ade..31d3586 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -7,9 +7,7 @@ */ #include <linux/types.h> -#include <linux/init.h> #include <linux/sctp.h> -#include <linux/module.h> #include <net/sctp/checksum.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -49,12 +47,15 @@ sctp_manip_pkt(struct sk_buff *skb, hdr->dest = tuple->dst.u.sctp.port; } - hdr->checksum = sctp_compute_cksum(skb, hdroff); + if (skb->ip_summed != CHECKSUM_PARTIAL) { + hdr->checksum = sctp_compute_cksum(skb, hdroff); + skb->ip_summed = CHECKSUM_NONE; + } return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { +const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .l4proto = IPPROTO_SCTP, .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -63,34 +64,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_sctp = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_sctp_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -err1: - return err; -} - -static void __exit nf_nat_proto_sctp_exit(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp); -} - -module_init(nf_nat_proto_sctp_init); -module_exit(nf_nat_proto_sctp_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SCTP NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 58340c97..366bfbf 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -8,11 +8,9 @@ */ #include <linux/types.h> -#include <linux/init.h> #include <linux/udp.h> #include <linux/netfilter.h> -#include <linux/module.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_l3proto.h> #include <net/netfilter/nf_nat_l4proto.h> @@ -64,7 +62,7 @@ udplite_manip_pkt(struct sk_buff *skb, return true; } -static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { +const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .l4proto = IPPROTO_UDPLITE, .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_l4proto_in_range, @@ -73,34 +71,3 @@ static const struct nf_nat_l4proto nf_nat_l4proto_udplite = { .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; - -static int __init nf_nat_proto_udplite_init(void) -{ - int err; - - err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite); - if (err < 0) - goto err1; - err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - if (err < 0) - goto err2; - return 0; - -err2: - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -err1: - return err; -} - -static void __exit nf_nat_proto_udplite_fini(void) -{ - nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite); - nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite); -} - -module_init(nf_nat_proto_udplite_init); -module_exit(nf_nat_proto_udplite_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("UDP-Lite NAT protocol helper"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 77cba9f..4a76624 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -185,7 +185,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, do { repeat: - verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); + verdict = nf_hook_entry_hookfn((*entryp), skb, state); if (verdict != NF_ACCEPT) { if (verdict != NF_REPEAT) return verdict; @@ -200,7 +200,6 @@ repeat: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry = entry->hook; - struct nf_hook_ops *elem = &hook_entry->ops; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; int err; @@ -209,7 +208,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) - verdict = elem->hook(elem->priv, skb, &entry->state); + verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(entry->state.pf); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e5194f6f..a019a87 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -22,6 +22,7 @@ #include <net/sock.h> static LIST_HEAD(nf_tables_expressions); +static LIST_HEAD(nf_tables_objects); /** * nft_register_afinfo - register nf_tables address family info @@ -110,12 +111,12 @@ static void nft_ctx_init(struct nft_ctx *ctx, ctx->seq = nlh->nlmsg_seq; } -static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, - u32 size) +static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, + int msg_type, u32 size, gfp_t gfp) { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL); + trans = kzalloc(sizeof(struct nft_trans) + size, gfp); if (trans == NULL) return NULL; @@ -125,6 +126,12 @@ static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type, return trans; } +static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, + int msg_type, u32 size) +{ + return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); +} + static void nft_trans_destroy(struct nft_trans *trans) { list_del(&trans->list); @@ -304,6 +311,38 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) return err; } +static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, + struct nft_object *obj) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_obj)); + if (trans == NULL) + return -ENOMEM; + + if (msg_type == NFT_MSG_NEWOBJ) + nft_activate_next(ctx->net, obj); + + nft_trans_obj(trans) = obj; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) +{ + int err; + + err = nft_trans_obj_add(ctx, NFT_MSG_DELOBJ, obj); + if (err < 0) + return err; + + nft_deactivate_next(ctx->net, obj); + ctx->table->use--; + + return err; +} + /* * Tables */ @@ -688,6 +727,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); + INIT_LIST_HEAD(&table->objects); table->flags = flags; nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); @@ -709,6 +749,7 @@ static int nft_flush_table(struct nft_ctx *ctx) { int err; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { @@ -735,6 +776,12 @@ static int nft_flush_table(struct nft_ctx *ctx) goto out; } + list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) { + err = nft_delobj(ctx, obj); + if (err < 0) + goto out; + } + list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { if (!nft_is_active_next(ctx->net, chain)) continue; @@ -2411,6 +2458,7 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, [NFTA_SET_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, + [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2462,6 +2510,7 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, } return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL_GPL(nf_tables_set_lookup); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, const struct nlattr *nla, @@ -2480,6 +2529,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net, } return ERR_PTR(-ENOENT); } +EXPORT_SYMBOL_GPL(nf_tables_set_lookup_byid); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -2568,6 +2618,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen))) goto nla_put_failure; } + if (set->flags & NFT_SET_OBJECT && + nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) + goto nla_put_failure; if (set->timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, @@ -2797,7 +2850,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, unsigned int size; bool create; u64 timeout; - u32 ktype, dtype, flags, policy, gc_int; + u32 ktype, dtype, flags, policy, gc_int, objtype; struct nft_set_desc desc; unsigned char *udata; u16 udlen; @@ -2827,11 +2880,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | - NFT_SET_MAP | NFT_SET_EVAL)) + NFT_SET_MAP | NFT_SET_EVAL | + NFT_SET_OBJECT)) return -EINVAL; - /* Only one of both operations is supported */ - if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == - (NFT_SET_MAP | NFT_SET_EVAL)) + /* Only one of these operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == + (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; } @@ -2856,6 +2910,19 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } else if (flags & NFT_SET_MAP) return -EINVAL; + if (nla[NFTA_SET_OBJ_TYPE] != NULL) { + if (!(flags & NFT_SET_OBJECT)) + return -EINVAL; + + objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (objtype == NFT_OBJECT_UNSPEC || + objtype > NFT_OBJECT_MAX) + return -EINVAL; + } else if (flags & NFT_SET_OBJECT) + return -EINVAL; + else + objtype = NFT_OBJECT_UNSPEC; + timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) @@ -2943,6 +3010,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, set->ktype = ktype; set->klen = desc.klen; set->dtype = dtype; + set->objtype = objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; @@ -3064,6 +3132,7 @@ bind: list_add_tail_rcu(&binding->list, &set->bindings); return 0; } +EXPORT_SYMBOL_GPL(nf_tables_bind_set); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding) @@ -3074,6 +3143,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } +EXPORT_SYMBOL_GPL(nf_tables_unbind_set); const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { @@ -3085,6 +3155,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_EXPR] = { .align = __alignof__(struct nft_expr), }, + [NFT_SET_EXT_OBJREF] = { + .len = sizeof(struct nft_object *), + .align = __alignof__(struct nft_object *), + }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), @@ -3173,6 +3247,11 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nla_put_string(skb, NFTA_SET_ELEM_OBJREF, + (*nft_set_ext_obj(ext))->name) < 0) + goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) @@ -3467,7 +3546,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_uninit(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); - + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) + (*nft_set_ext_obj(ext))->use--; kfree(elem); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -3492,11 +3572,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + u8 genmask = nft_genmask_next(ctx->net); struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data data; enum nft_registers dreg; @@ -3559,6 +3641,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); } + if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { + if (!(set->flags & NFT_SET_OBJECT)) { + err = -EINVAL; + goto err2; + } + obj = nf_tables_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + set->objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err2; + } + nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF); + } + if (nla[NFTA_SET_ELEM_DATA] != NULL) { err = nft_data_init(ctx, &data, sizeof(data), &d2, nla[NFTA_SET_ELEM_DATA]); @@ -3617,6 +3713,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, udata->len = ulen - 1; nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); } + if (obj) { + *nft_set_ext_obj(ext) = obj; + obj->use++; + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) @@ -3626,10 +3726,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = set->ops->insert(ctx->net, set, &elem, &ext2); if (err) { if (err == -EEXIST) { - if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && - nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && - memcmp(nft_set_ext_data(ext), - nft_set_ext_data(ext2), set->dlen) != 0) + if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) || + (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) && + *nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2))) err = -EBUSY; else if (!(nlmsg_flags & NLM_F_EXCL)) err = 0; @@ -3779,6 +3882,34 @@ err1: return err; } +static int nft_flush_set(const struct nft_ctx *ctx, + const struct nft_set *set, + const struct nft_set_iter *iter, + const struct nft_set_elem *elem) +{ + struct nft_trans *trans; + int err; + + trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, + sizeof(struct nft_trans_elem), GFP_ATOMIC); + if (!trans) + return -ENOMEM; + + if (!set->ops->deactivate_one(ctx->net, set, elem->priv)) { + err = -ENOENT; + goto err1; + } + + nft_trans_elem_set(trans) = (struct nft_set *)set; + nft_trans_elem(trans) = *((struct nft_set_elem *)elem); + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +err1: + kfree(trans); + return err; +} + static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -3789,9 +3920,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct nft_ctx ctx; int rem, err = 0; - if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) - return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -3803,6 +3931,18 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY; + if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) { + struct nft_set_dump_args args = { + .iter = { + .genmask = genmask, + .fn = nft_flush_set, + }, + }; + set->ops->walk(&ctx, set, &args.iter); + + return args.iter.err; + } + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_del_setelem(&ctx, set, attr); if (err < 0) @@ -3838,6 +3978,500 @@ struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, } EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); +/* + * Stateful objects + */ + +/** + * nft_register_obj- register nf_tables stateful object type + * @obj: object type + * + * Registers the object type for use with nf_tables. Returns zero on + * success or a negative errno code otherwise. + */ +int nft_register_obj(struct nft_object_type *obj_type) +{ + if (obj_type->type == NFT_OBJECT_UNSPEC) + return -EINVAL; + + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_add_rcu(&obj_type->list, &nf_tables_objects); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + return 0; +} +EXPORT_SYMBOL_GPL(nft_register_obj); + +/** + * nft_unregister_obj - unregister nf_tables object type + * @obj: object type + * + * Unregisters the object type for use with nf_tables. + */ +void nft_unregister_obj(struct nft_object_type *obj_type) +{ + nfnl_lock(NFNL_SUBSYS_NFTABLES); + list_del_rcu(&obj_type->list); + nfnl_unlock(NFNL_SUBSYS_NFTABLES); +} +EXPORT_SYMBOL_GPL(nft_unregister_obj); + +struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, + u32 objtype, u8 genmask) +{ + struct nft_object *obj; + + list_for_each_entry(obj, &table->objects, list) { + if (!nla_strcmp(nla, obj->name) && + objtype == obj->type->type && + nft_active_genmask(obj, genmask)) + return obj; + } + return ERR_PTR(-ENOENT); +} +EXPORT_SYMBOL_GPL(nf_tables_obj_lookup); + +static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { + [NFTA_OBJ_TABLE] = { .type = NLA_STRING }, + [NFTA_OBJ_NAME] = { .type = NLA_STRING }, + [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, +}; + +static struct nft_object *nft_obj_init(const struct nft_object_type *type, + const struct nlattr *attr) +{ + struct nlattr *tb[type->maxattr + 1]; + struct nft_object *obj; + int err; + + if (attr) { + err = nla_parse_nested(tb, type->maxattr, attr, type->policy); + if (err < 0) + goto err1; + } else { + memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); + } + + err = -ENOMEM; + obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL); + if (obj == NULL) + goto err1; + + err = type->init((const struct nlattr * const *)tb, obj); + if (err < 0) + goto err2; + + obj->type = type; + return obj; +err2: + kfree(obj); +err1: + return ERR_PTR(err); +} + +static int nft_object_dump(struct sk_buff *skb, unsigned int attr, + struct nft_object *obj, bool reset) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr); + if (!nest) + goto nla_put_failure; + if (obj->type->dump(skb, obj, reset) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + +static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + list_for_each_entry(type, &nf_tables_objects, list) { + if (objtype == type->type) + return type; + } + return NULL; +} + +static const struct nft_object_type *nft_obj_type_get(u32 objtype) +{ + const struct nft_object_type *type; + + type = __nft_obj_type_get(objtype); + if (type != NULL && try_module_get(type->owner)) + return type; + +#ifdef CONFIG_MODULES + if (type == NULL) { + nfnl_unlock(NFNL_SUBSYS_NFTABLES); + request_module("nft-obj-%u", objtype); + nfnl_lock(NFNL_SUBSYS_NFTABLES); + if (__nft_obj_type_get(objtype)) + return ERR_PTR(-EAGAIN); + } +#endif + return ERR_PTR(-ENOENT); +} + +static int nf_tables_newobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nft_object_type *type; + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + int err; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_DATA]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + if (err != -ENOENT) + return err; + + obj = NULL; + } + + if (obj != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + return 0; + } + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + type = nft_obj_type_get(objtype); + if (IS_ERR(type)) + return PTR_ERR(type); + + obj = nft_obj_init(type, nla[NFTA_OBJ_DATA]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err1; + } + obj->table = table; + nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); + + err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); + if (err < 0) + goto err2; + + list_add_tail_rcu(&obj->list, &table->objects); + table->use++; + return 0; +err2: + if (obj->type->destroy) + obj->type->destroy(obj); + kfree(obj); +err1: + module_put(type->owner); + return err; +} + +static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, + u32 portid, u32 seq, int event, u32 flags, + int family, const struct nft_table *table, + struct nft_object *obj, bool reset) +{ + struct nfgenmsg *nfmsg; + struct nlmsghdr *nlh; + + event |= NFNL_SUBSYS_NFTABLES << 8; + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags); + if (nlh == NULL) + goto nla_put_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = family; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(net->nft.base_seq & 0xffff); + + if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) || + nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || + nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return 0; + +nla_put_failure: + nlmsg_trim(skb, nlh); + return -1; +} + +struct nft_obj_filter { + char table[NFT_OBJ_MAXNAMELEN]; + u32 type; +}; + +static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_af_info *afi; + const struct nft_table *table; + unsigned int idx = 0, s_idx = cb->args[0]; + struct nft_obj_filter *filter = cb->data; + struct net *net = sock_net(skb->sk); + int family = nfmsg->nfgen_family; + struct nft_object *obj; + bool reset = false; + + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + + rcu_read_lock(); + cb->seq = net->nft.base_seq; + + list_for_each_entry_rcu(afi, &net->nft.af_info, list) { + if (family != NFPROTO_UNSPEC && family != afi->family) + continue; + + list_for_each_entry_rcu(table, &afi->tables, list) { + list_for_each_entry_rcu(obj, &table->objects, list) { + if (!nft_is_active(net, obj)) + goto cont; + if (idx < s_idx) + goto cont; + if (idx > s_idx) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + if (filter->table[0] && + strcmp(filter->table, table->name)) + goto cont; + if (filter->type != NFT_OBJECT_UNSPEC && + obj->type->type != filter->type) + goto cont; + + if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFT_MSG_NEWOBJ, + NLM_F_MULTI | NLM_F_APPEND, + afi->family, table, obj, reset) < 0) + goto done; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } + } +done: + rcu_read_unlock(); + + cb->args[0] = idx; + return skb->len; +} + +static int nf_tables_dump_obj_done(struct netlink_callback *cb) +{ + kfree(cb->data); + + return 0; +} + +static struct nft_obj_filter * +nft_obj_filter_alloc(const struct nlattr * const nla[]) +{ + struct nft_obj_filter *filter; + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); + + if (nla[NFTA_OBJ_TABLE]) + nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE], + NFT_TABLE_MAXNAMELEN); + if (nla[NFTA_OBJ_TYPE]) + filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + + return filter; +} + +static int nf_tables_getobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); + int family = nfmsg->nfgen_family; + const struct nft_af_info *afi; + const struct nft_table *table; + struct nft_object *obj; + struct sk_buff *skb2; + bool reset = false; + u32 objtype; + int err; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + }; + + if (nla[NFTA_OBJ_TABLE] || + nla[NFTA_OBJ_TYPE]) { + struct nft_obj_filter *filter; + + filter = nft_obj_filter_alloc(nla); + if (IS_ERR(filter)) + return -ENOMEM; + + c.data = filter; + } + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_OBJ_NAME] || + !nla[NFTA_OBJ_TYPE]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, false); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + return -ENOMEM; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + reset = true; + + err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) + goto err; + + return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid); +err: + kfree_skb(skb2); + return err; + + return 0; +} + +static void nft_obj_destroy(struct nft_object *obj) +{ + if (obj->type->destroy) + obj->type->destroy(obj); + + module_put(obj->type->owner); + kfree(obj); +} + +static int nf_tables_delobj(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[]) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_object *obj; + struct nft_ctx ctx; + u32 objtype; + + if (!nla[NFTA_OBJ_TYPE] || + !nla[NFTA_OBJ_NAME]) + return -EINVAL; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_OBJ_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); + obj = nf_tables_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + if (IS_ERR(obj)) + return PTR_ERR(obj); + if (obj->use > 0) + return -EBUSY; + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); + + return nft_delobj(&ctx, obj); +} + +int nft_obj_notify(struct net *net, struct nft_table *table, + struct nft_object *obj, u32 portid, u32 seq, int event, + int family, int report, gfp_t gfp) +{ + struct sk_buff *skb; + int err; + + if (!report && + !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) + return 0; + + err = -ENOBUFS; + skb = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (skb == NULL) + goto err; + + err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family, + table, obj, false); + if (err < 0) { + kfree_skb(skb); + goto err; + } + + err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); +err: + if (err < 0) { + nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); + } + return err; +} +EXPORT_SYMBOL_GPL(nft_obj_notify); + +static int nf_tables_obj_notify(const struct nft_ctx *ctx, + struct nft_object *obj, int event) +{ + return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, + ctx->seq, event, ctx->afi->family, ctx->report, + GFP_KERNEL); +} + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -3998,6 +4632,26 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { [NFT_MSG_GETGEN] = { .call = nf_tables_getgen, }, + [NFT_MSG_NEWOBJ] = { + .call_batch = nf_tables_newobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_DELOBJ] = { + .call_batch = nf_tables_delobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, + [NFT_MSG_GETOBJ_RESET] = { + .call = nf_tables_getobj, + .attr_count = NFTA_OBJ_MAX, + .policy = nft_obj_policy, + }, }; static void nft_chain_commit_update(struct nft_trans *trans) @@ -4040,6 +4694,9 @@ static void nf_tables_commit_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_DELOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4147,6 +4804,17 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) atomic_dec(&te->set->nelems); te->set->ndeact--; break; + case NFT_MSG_NEWOBJ: + nft_clear(net, nft_trans_obj(trans)); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + nft_trans_destroy(trans); + break; + case NFT_MSG_DELOBJ: + list_del_rcu(&nft_trans_obj(trans)->list); + nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + NFT_MSG_DELOBJ); + break; } } @@ -4181,6 +4849,9 @@ static void nf_tables_abort_release(struct nft_trans *trans) nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem(trans).priv, true); break; + case NFT_MSG_NEWOBJ: + nft_obj_destroy(nft_trans_obj(trans)); + break; } kfree(trans); } @@ -4261,6 +4932,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; + case NFT_MSG_NEWOBJ: + trans->ctx.table->use--; + list_del_rcu(&nft_trans_obj(trans)->list); + break; + case NFT_MSG_DELOBJ: + trans->ctx.table->use++; + nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_trans_destroy(trans); + break; } } @@ -4807,6 +5487,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) { struct nft_table *table, *nt; struct nft_chain *chain, *nc; + struct nft_object *obj, *ne; struct nft_rule *rule, *nr; struct nft_set *set, *ns; struct nft_ctx ctx = { @@ -4833,6 +5514,11 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) table->use--; nft_set_destroy(set); } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + list_del(&obj->list); + table->use--; + nft_obj_destroy(obj); + } list_for_each_entry_safe(chain, nc, &table->chains, list) { list_del(&chain->list); table->use--; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 763cb4d..200922b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -1152,6 +1152,7 @@ MODULE_ALIAS_NF_LOGGER(AF_INET, 1); MODULE_ALIAS_NF_LOGGER(AF_INET6, 1); MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1); MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */ +MODULE_ALIAS_NF_LOGGER(5, 1); /* NFPROTO_NETDEV */ module_init(nfnetlink_log_init); module_exit(nfnetlink_log_fini); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 77db835..f6a02c5 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -31,11 +31,10 @@ struct nft_counter_percpu_priv { struct nft_counter_percpu __percpu *counter; }; -static void nft_counter_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); struct nft_counter_percpu *this_cpu; local_bh_disable(); @@ -47,10 +46,64 @@ static void nft_counter_eval(const struct nft_expr *expr, local_bh_enable(); } -static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, +static inline void nft_counter_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + nft_counter_do_eval(priv, regs, pkt); +} + +static int nft_counter_do_init(const struct nlattr * const tb[], + struct nft_counter_percpu_priv *priv) +{ + struct nft_counter_percpu __percpu *cpu_stats; + struct nft_counter_percpu *this_cpu; + + cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); + if (cpu_stats == NULL) + return -ENOMEM; + + preempt_disable(); + this_cpu = this_cpu_ptr(cpu_stats); + if (tb[NFTA_COUNTER_PACKETS]) { + this_cpu->counter.packets = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + } + if (tb[NFTA_COUNTER_BYTES]) { + this_cpu->counter.bytes = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + } + preempt_enable(); + priv->counter = cpu_stats; + return 0; +} + +static int nft_counter_obj_init(const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + return nft_counter_do_init(tb, priv); +} + +static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv) +{ + free_percpu(priv->counter); +} + +static void nft_counter_obj_destroy(struct nft_object *obj) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + nft_counter_do_destroy(priv); +} + +static void nft_counter_fetch(struct nft_counter_percpu __percpu *counter, struct nft_counter *total) { - const struct nft_counter_percpu *cpu_stats; + struct nft_counter_percpu *cpu_stats; u64 bytes, packets; unsigned int seq; int cpu; @@ -69,12 +122,52 @@ static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter, } } -static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +static u64 __nft_counter_reset(u64 *counter) +{ + u64 ret, old; + + do { + old = *counter; + ret = cmpxchg64(counter, old, 0); + } while (ret != old); + + return ret; +} + +static void nft_counter_reset(struct nft_counter_percpu __percpu *counter, + struct nft_counter *total) +{ + struct nft_counter_percpu *cpu_stats; + u64 bytes, packets; + unsigned int seq; + int cpu; + + memset(total, 0, sizeof(*total)); + for_each_possible_cpu(cpu) { + bytes = packets = 0; + + cpu_stats = per_cpu_ptr(counter, cpu); + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + packets += __nft_counter_reset(&cpu_stats->counter.packets); + bytes += __nft_counter_reset(&cpu_stats->counter.bytes); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + + total->packets += packets; + total->bytes += bytes; + } +} + +static int nft_counter_do_dump(struct sk_buff *skb, + const struct nft_counter_percpu_priv *priv, + bool reset) { - struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); struct nft_counter total; - nft_counter_fetch(priv->counter, &total); + if (reset) + nft_counter_reset(priv->counter, &total); + else + nft_counter_fetch(priv->counter, &total); if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes), NFTA_COUNTER_PAD) || @@ -87,36 +180,54 @@ nla_put_failure: return -1; } +static int nft_counter_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + struct nft_counter_percpu_priv *priv = nft_obj_data(obj); + + return nft_counter_do_dump(skb, priv, reset); +} + static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 }, [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; +static struct nft_object_type nft_counter_obj __read_mostly = { + .type = NFT_OBJECT_COUNTER, + .size = sizeof(struct nft_counter_percpu_priv), + .maxattr = NFTA_COUNTER_MAX, + .policy = nft_counter_policy, + .eval = nft_counter_obj_eval, + .init = nft_counter_obj_init, + .destroy = nft_counter_obj_destroy, + .dump = nft_counter_obj_dump, + .owner = THIS_MODULE, +}; + +static void nft_counter_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + + nft_counter_do_eval(priv, regs, pkt); +} + +static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + + return nft_counter_do_dump(skb, priv, false); +} + static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - struct nft_counter_percpu __percpu *cpu_stats; - struct nft_counter_percpu *this_cpu; - cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); - if (cpu_stats == NULL) - return -ENOMEM; - - preempt_disable(); - this_cpu = this_cpu_ptr(cpu_stats); - if (tb[NFTA_COUNTER_PACKETS]) { - this_cpu->counter.packets = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); - } - if (tb[NFTA_COUNTER_BYTES]) { - this_cpu->counter.bytes = - be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); - } - preempt_enable(); - priv->counter = cpu_stats; - return 0; + return nft_counter_do_init(tb, priv); } static void nft_counter_destroy(const struct nft_ctx *ctx, @@ -124,7 +235,7 @@ static void nft_counter_destroy(const struct nft_ctx *ctx, { struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - free_percpu(priv->counter); + nft_counter_do_destroy(priv); } static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) @@ -174,12 +285,26 @@ static struct nft_expr_type nft_counter_type __read_mostly = { static int __init nft_counter_module_init(void) { - return nft_register_expr(&nft_counter_type); + int err; + + err = nft_register_obj(&nft_counter_obj); + if (err < 0) + return err; + + err = nft_register_expr(&nft_counter_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_counter_obj); + return err; } static void __exit nft_counter_module_exit(void) { nft_unregister_expr(&nft_counter_type); + nft_unregister_obj(&nft_counter_obj); } module_init(nft_counter_module_init); @@ -188,3 +313,4 @@ module_exit(nft_counter_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("counter"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 6837348..e6baeae 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -208,37 +208,37 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_l3proto_try_module_get(uint8_t family) +static int nft_ct_netns_get(struct net *net, uint8_t family) { int err; if (family == NFPROTO_INET) { - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV4); + err = nf_ct_netns_get(net, NFPROTO_IPV4); if (err < 0) goto err1; - err = nf_ct_l3proto_try_module_get(NFPROTO_IPV6); + err = nf_ct_netns_get(net, NFPROTO_IPV6); if (err < 0) goto err2; } else { - err = nf_ct_l3proto_try_module_get(family); + err = nf_ct_netns_get(net, family); if (err < 0) goto err1; } return 0; err2: - nf_ct_l3proto_module_put(NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV4); err1: return err; } -static void nft_ct_l3proto_module_put(uint8_t family) +static void nft_ct_netns_put(struct net *net, uint8_t family) { if (family == NFPROTO_INET) { - nf_ct_l3proto_module_put(NFPROTO_IPV4); - nf_ct_l3proto_module_put(NFPROTO_IPV6); + nf_ct_netns_put(net, NFPROTO_IPV4); + nf_ct_netns_put(net, NFPROTO_IPV6); } else - nf_ct_l3proto_module_put(family); + nf_ct_netns_put(net, family); } static int nft_ct_get_init(const struct nft_ctx *ctx, @@ -342,7 +342,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -390,7 +390,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_l3proto_try_module_get(ctx->afi->family); + err = nft_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -405,7 +405,7 @@ err1: static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { - nft_ct_l3proto_module_put(ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static void nft_ct_set_destroy(const struct nft_ctx *ctx, @@ -423,7 +423,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, break; } - nft_ct_l3proto_module_put(ctx->afi->family); + nft_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 249c9b8..29a4906 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -86,7 +86,7 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0) return -EINVAL; - priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT])); + priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); switch (priv->result) { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 763ebc3..ce13a50 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -26,8 +26,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; - nf_dup_netdev_egress(pkt, oif); - regs->verdict.code = NF_DROP; + nf_fwd_netdev_egress(pkt, oif); + regs->verdict.code = NF_STOLEN; } static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 81b5ad6..11ce016 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -77,7 +77,7 @@ int nft_masq_init(const struct nft_ctx *ctx, } } - return 0; + return nf_ct_netns_get(ctx->net, ctx->afi->family); } EXPORT_SYMBOL_GPL(nft_masq_init); @@ -105,4 +105,4 @@ nla_put_failure: EXPORT_SYMBOL_GPL(nft_masq_dump); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index ee2d717..19a7bf3 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -209,7 +209,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - return 0; + return nf_ct_netns_get(ctx->net, family); } static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -257,12 +257,21 @@ nla_put_failure: return -1; } +static void +nft_nat_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + const struct nft_nat *priv = nft_expr_priv(expr); + + nf_ct_netns_put(ctx->net, priv->family); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)), .eval = nft_nat_eval, .init = nft_nat_init, + .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, }; diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c new file mode 100644 index 0000000..415a65b --- /dev/null +++ b/net/netfilter/nft_objref.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012-2016 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr)) + +static void nft_objref_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->type->eval(obj, regs, pkt); +} + +static int nft_objref_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_object *obj = nft_objref_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + u32 objtype; + + if (!tb[NFTA_OBJREF_IMM_NAME] || + !tb[NFTA_OBJREF_IMM_TYPE]) + return -EINVAL; + + objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); + obj = nf_tables_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + genmask); + if (IS_ERR(obj)) + return -ENOENT; + + nft_objref_priv(expr) = obj; + obj->use++; + + return 0; +} + +static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_object *obj = nft_objref_priv(expr); + + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static void nft_objref_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use--; +} + +static struct nft_expr_type nft_objref_type; +static const struct nft_expr_ops nft_objref_ops = { + .type = &nft_objref_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), + .eval = nft_objref_eval, + .init = nft_objref_init, + .destroy = nft_objref_destroy, + .dump = nft_objref_dump, +}; + +struct nft_objref_map { + struct nft_set *set; + enum nft_registers sreg:8; + struct nft_set_binding binding; +}; + +static void nft_objref_map_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + const struct nft_set *set = priv->set; + const struct nft_set_ext *ext; + struct nft_object *obj; + bool found; + + found = set->ops->lookup(nft_net(pkt), set, ®s->data[priv->sreg], + &ext); + if (!found) { + regs->verdict.code = NFT_BREAK; + return; + } + obj = *nft_set_ext_obj(ext); + obj->type->eval(obj, regs, pkt); +} + +static int nft_objref_map_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); + struct nft_set *set; + int err; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_OBJREF_SET_NAME], genmask); + if (IS_ERR(set)) { + if (tb[NFTA_OBJREF_SET_ID]) { + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_OBJREF_SET_ID], + genmask); + } + if (IS_ERR(set)) + return PTR_ERR(set); + } + + if (!(set->flags & NFT_SET_OBJECT)) + return -EINVAL; + + priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); + if (err < 0) + return err; + + priv->binding.flags = set->flags & NFT_SET_OBJECT; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + return err; + + priv->set = set; + return 0; +} + +static int nft_objref_map_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_objref_map *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_OBJREF_SET_SREG, priv->sreg) || + nla_put_string(skb, NFTA_OBJREF_SET_NAME, priv->set->name)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static void nft_objref_map_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(ctx, priv->set, &priv->binding); +} + +static struct nft_expr_type nft_objref_type; +static const struct nft_expr_ops nft_objref_map_ops = { + .type = &nft_objref_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), + .eval = nft_objref_map_eval, + .init = nft_objref_map_init, + .destroy = nft_objref_map_destroy, + .dump = nft_objref_map_dump, +}; + +static const struct nft_expr_ops * +nft_objref_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_OBJREF_SET_SREG] && + (tb[NFTA_OBJREF_SET_NAME] || + tb[NFTA_OBJREF_SET_ID])) + return &nft_objref_map_ops; + else if (tb[NFTA_OBJREF_IMM_NAME] && + tb[NFTA_OBJREF_IMM_TYPE]) + return &nft_objref_ops; + + return ERR_PTR(-EOPNOTSUPP); +} + +static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { + [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING }, + [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, +}; + +static struct nft_expr_type nft_objref_type __read_mostly = { + .name = "objref", + .select_ops = nft_objref_select_ops, + .policy = nft_objref_policy, + .maxattr = NFTA_OBJREF_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_objref_module_init(void) +{ + return nft_register_expr(&nft_objref_type); +} + +static void __exit nft_objref_module_exit(void) +{ + nft_unregister_expr(&nft_objref_type); +} + +module_init(nft_objref_module_init); +module_exit(nft_objref_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_EXPR("objref"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 98fb5d7..36d2b10 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,6 +18,10 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +/* For layer 4 checksum field offset. */ +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/icmpv6.h> /* add vlan header into the user buffer for if tag was removed by offloads */ static bool @@ -164,6 +169,87 @@ const struct nft_expr_ops nft_payload_fast_ops = { .dump = nft_payload_dump, }; +static inline void nft_csum_replace(__sum16 *sum, __wsum fsum, __wsum tsum) +{ + *sum = csum_fold(csum_add(csum_sub(~csum_unfold(*sum), fsum), tsum)); + if (*sum == 0) + *sum = CSUM_MANGLED_0; +} + +static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff) +{ + struct udphdr *uh, _uh; + + uh = skb_header_pointer(skb, thoff, sizeof(_uh), &_uh); + if (!uh) + return false; + + return uh->check; +} + +static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + unsigned int *l4csum_offset) +{ + switch (pkt->tprot) { + case IPPROTO_TCP: + *l4csum_offset = offsetof(struct tcphdr, check); + break; + case IPPROTO_UDP: + if (!nft_payload_udp_checksum(skb, pkt->xt.thoff)) + return -1; + /* Fall through. */ + case IPPROTO_UDPLITE: + *l4csum_offset = offsetof(struct udphdr, check); + break; + case IPPROTO_ICMPV6: + *l4csum_offset = offsetof(struct icmp6hdr, icmp6_cksum); + break; + default: + return -1; + } + + *l4csum_offset += pkt->xt.thoff; + return 0; +} + +static int nft_payload_l4csum_update(const struct nft_pktinfo *pkt, + struct sk_buff *skb, + __wsum fsum, __wsum tsum) +{ + int l4csum_offset; + __sum16 sum; + + /* If we cannot determine layer 4 checksum offset or this packet doesn't + * require layer 4 checksum recalculation, skip this packet. + */ + if (nft_payload_l4csum_offset(pkt, skb, &l4csum_offset) < 0) + return 0; + + if (skb_copy_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + /* Checksum mangling for an arbitrary amount of bytes, based on + * inet_proto_csum_replace*() functions. + */ + if (skb->ip_summed != CHECKSUM_PARTIAL) { + nft_csum_replace(&sum, fsum, tsum); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->csum = ~csum_add(csum_sub(~(skb->csum), fsum), + tsum); + } + } else { + sum = ~csum_fold(csum_add(csum_sub(csum_unfold(sum), fsum), + tsum)); + } + + if (!skb_make_writable(skb, l4csum_offset + sizeof(sum)) || + skb_store_bits(skb, l4csum_offset, &sum, sizeof(sum)) < 0) + return -1; + + return 0; +} + static void nft_payload_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -204,14 +290,15 @@ static void nft_payload_set_eval(const struct nft_expr *expr, fsum = skb_checksum(skb, offset, priv->len, 0); tsum = csum_partial(src, priv->len, 0); - sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum), - tsum)); - if (sum == 0) - sum = CSUM_MANGLED_0; + nft_csum_replace(&sum, fsum, tsum); if (!skb_make_writable(skb, csum_offset + sizeof(sum)) || skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0) goto err; + + if (priv->csum_flags && + nft_payload_l4csum_update(pkt, skb, fsum, tsum) < 0) + goto err; } if (!skb_make_writable(skb, max(offset + priv->len, 0)) || @@ -240,6 +327,15 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) priv->csum_offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET])); + if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) { + u32 flags; + + flags = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_FLAGS])); + if (flags & ~NFT_PAYLOAD_L4CSUM_PSEUDOHDR) + return -EINVAL; + + priv->csum_flags = flags; + } switch (priv->csum_type) { case NFT_PAYLOAD_CSUM_NONE: @@ -262,7 +358,8 @@ static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) || nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET, - htonl(priv->csum_offset))) + htonl(priv->csum_offset)) || + nla_put_be32(skb, NFTA_PAYLOAD_CSUM_FLAGS, htonl(priv->csum_flags))) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c00104c..bd6efc5 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -17,38 +17,59 @@ struct nft_quota { u64 quota; - bool invert; - atomic64_t remain; + unsigned long flags; + atomic64_t consumed; }; static inline bool nft_overquota(struct nft_quota *priv, - const struct nft_pktinfo *pkt) + const struct sk_buff *skb) { - return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; + return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota; } -static void nft_quota_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline bool nft_quota_invert(struct nft_quota *priv) { - struct nft_quota *priv = nft_expr_priv(expr); + return priv->flags & NFT_QUOTA_F_INV; +} - if (nft_overquota(priv, pkt) ^ priv->invert) +static inline void nft_quota_do_eval(struct nft_quota *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv)) regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, + [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 }, }; -static int nft_quota_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +#define NFT_QUOTA_DEPLETED_BIT 1 /* From NFT_QUOTA_F_DEPLETED. */ + +static void nft_quota_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_quota *priv = nft_expr_priv(expr); - u32 flags = 0; - u64 quota; + struct nft_quota *priv = nft_obj_data(obj); + bool overquota; + + overquota = nft_overquota(priv, pkt->skb); + if (overquota ^ nft_quota_invert(priv)) + regs->verdict.code = NFT_BREAK; + + if (overquota && + !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0, + NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC); +} + +static int nft_quota_do_init(const struct nlattr * const tb[], + struct nft_quota *priv) +{ + unsigned long flags = 0; + u64 quota, consumed = 0; if (!tb[NFTA_QUOTA_BYTES]) return -EINVAL; @@ -57,26 +78,60 @@ static int nft_quota_init(const struct nft_ctx *ctx, if (quota > S64_MAX) return -EOVERFLOW; + if (tb[NFTA_QUOTA_CONSUMED]) { + consumed = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_CONSUMED])); + if (consumed > quota) + return -EINVAL; + } + if (tb[NFTA_QUOTA_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); if (flags & ~NFT_QUOTA_F_INV) return -EINVAL; + if (flags & NFT_QUOTA_F_DEPLETED) + return -EOPNOTSUPP; } priv->quota = quota; - priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; - atomic64_set(&priv->remain, quota); + priv->flags = flags; + atomic64_set(&priv->consumed, consumed); return 0; } -static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_quota_obj_init(const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_quota *priv = nft_obj_data(obj); + + return nft_quota_do_init(tb, priv); +} + +static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, + bool reset) { - const struct nft_quota *priv = nft_expr_priv(expr); - u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + u32 flags = priv->flags; + u64 consumed; + + if (reset) { + consumed = atomic64_xchg(&priv->consumed, 0); + if (test_and_clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags)) + flags |= NFT_QUOTA_F_DEPLETED; + } else { + consumed = atomic64_read(&priv->consumed); + } + + /* Since we inconditionally increment consumed quota for each packet + * that we see, don't go over the quota boundary in what we send to + * userspace. + */ + if (consumed > priv->quota) + consumed = priv->quota; if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), NFTA_QUOTA_PAD) || + nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed), + NFTA_QUOTA_PAD) || nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) goto nla_put_failure; return 0; @@ -85,6 +140,50 @@ nla_put_failure: return -1; } +static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, + bool reset) +{ + struct nft_quota *priv = nft_obj_data(obj); + + return nft_quota_do_dump(skb, priv, reset); +} + +static struct nft_object_type nft_quota_obj __read_mostly = { + .type = NFT_OBJECT_QUOTA, + .size = sizeof(struct nft_quota), + .maxattr = NFTA_QUOTA_MAX, + .policy = nft_quota_policy, + .init = nft_quota_obj_init, + .eval = nft_quota_obj_eval, + .dump = nft_quota_obj_dump, + .owner = THIS_MODULE, +}; + +static void nft_quota_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + nft_quota_do_eval(priv, regs, pkt); +} + +static int nft_quota_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + return nft_quota_do_init(tb, priv); +} + +static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + return nft_quota_do_dump(skb, priv, false); +} + static struct nft_expr_type nft_quota_type; static const struct nft_expr_ops nft_quota_ops = { .type = &nft_quota_type, @@ -105,12 +204,26 @@ static struct nft_expr_type nft_quota_type __read_mostly = { static int __init nft_quota_module_init(void) { - return nft_register_expr(&nft_quota_type); + int err; + + err = nft_register_obj(&nft_quota_obj); + if (err < 0) + return err; + + err = nft_register_expr(&nft_quota_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_quota_obj); + return err; } static void __exit nft_quota_module_exit(void) { - nft_unregister_expr(&nft_quota_type); + nft_unregister_expr(&nft_quota_type); + nft_unregister_obj(&nft_quota_obj); } module_init(nft_quota_module_init); @@ -119,3 +232,4 @@ module_exit(nft_quota_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_ALIAS_NFT_EXPR("quota"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_QUOTA); diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 03f7bf4..40dcd05 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com> + * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -79,7 +79,7 @@ int nft_redir_init(const struct nft_ctx *ctx, return -EINVAL; } - return 0; + return nf_ct_netns_get(ctx->net, ctx->afi->family); } EXPORT_SYMBOL_GPL(nft_redir_init); @@ -108,4 +108,4 @@ nla_put_failure: EXPORT_SYMBOL_GPL(nft_redir_dump); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); +MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>"); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index a3dface..1e20e2b 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -167,6 +167,19 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set, nft_set_elem_clear_busy(&he->ext); } +static bool nft_hash_deactivate_one(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_hash_elem *he = priv; + + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) { + nft_set_elem_change_active(net, set, &he->ext); + return true; + } + return false; +} + static void *nft_hash_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -181,13 +194,10 @@ static void *nft_hash_deactivate(const struct net *net, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(net, set, &he->ext); - else - he = NULL; - } + if (he != NULL && + !nft_hash_deactivate_one(net, set, he)) + he = NULL; + rcu_read_unlock(); return he; @@ -387,6 +397,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, + .deactivate_one = nft_hash_deactivate_one, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .update = nft_hash_update, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 36493a7..08376e5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -171,6 +171,15 @@ static void nft_rbtree_activate(const struct net *net, nft_set_elem_change_active(net, set, &rbe->ext); } +static bool nft_rbtree_deactivate_one(const struct net *net, + const struct nft_set *set, void *priv) +{ + struct nft_rbtree_elem *rbe = priv; + + nft_set_elem_change_active(net, set, &rbe->ext); + return true; +} + static void *nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) @@ -204,7 +213,7 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; continue; } - nft_set_elem_change_active(net, set, &rbe->ext); + nft_rbtree_deactivate_one(net, set, rbe); return rbe; } } @@ -295,6 +304,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .insert = nft_rbtree_insert, .remove = nft_rbtree_remove, .deactivate = nft_rbtree_deactivate, + .deactivate_one = nft_rbtree_deactivate_one, .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ad818e5..2ff4996 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +#define XT_PCPU_BLOCK_SIZE 4096 struct compat_delta { unsigned int offset; /* offset in kernel */ @@ -958,7 +959,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (!info) { - info = vmalloc(sz); + info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | + __GFP_NORETRY | __GFP_HIGHMEM, + PAGE_KERNEL); if (!info) return NULL; } @@ -1615,6 +1618,59 @@ void xt_proto_fini(struct net *net, u_int8_t af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +/** + * xt_percpu_counter_alloc - allocate x_tables rule counter + * + * @state: pointer to xt_percpu allocation state + * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct + * + * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then + * contain the address of the real (percpu) counter. + * + * Rule evaluation needs to use xt_get_this_cpu_counter() helper + * to fetch the real percpu counter. + * + * To speed up allocation and improve data locality, a 4kb block is + * allocated. + * + * xt_percpu_counter_alloc_state contains the base address of the + * allocated page and the current sub-offset. + * + * returns false on error. + */ +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter) +{ + BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2)); + + if (nr_cpu_ids <= 1) + return true; + + if (!state->mem) { + state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE, + XT_PCPU_BLOCK_SIZE); + if (!state->mem) + return false; + } + counter->pcnt = (__force unsigned long)(state->mem + state->off); + state->off += sizeof(*counter); + if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) { + state->mem = NULL; + state->off = 0; + } + return true; +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc); + +void xt_percpu_counter_free(struct xt_counters *counters) +{ + unsigned long pcnt = counters->pcnt; + + if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0) + free_percpu((void __percpu *)pcnt); +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_free); + static int __net_init xt_net_init(struct net *net) { int i; diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index e04dc28..da56c06 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -106,7 +106,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -115,7 +115,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connsecmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 6669e68..95c7503 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -216,7 +216,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err1; #endif - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) goto err1; @@ -260,7 +260,7 @@ out: err3: nf_ct_tmpl_free(ct); err2: - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); err1: return ret; } @@ -341,7 +341,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, if (help) module_put(help->helper->me); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); xt_ct_destroy_timeout(ct); nf_ct_put(info->ct); diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index 94d0b54..e45a012 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -60,7 +60,12 @@ static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return -EINVAL; - return 0; + return nf_ct_netns_get(par->net, par->family); +} + +static void netmap_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static unsigned int @@ -111,7 +116,7 @@ static int netmap_tg4_check(const struct xt_tgchk_param *par) pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); } static struct xt_target netmap_tg_reg[] __read_mostly = { @@ -127,6 +132,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = { (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg6_checkentry, + .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, { @@ -141,6 +147,7 @@ static struct xt_target netmap_tg_reg[] __read_mostly = { (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), .checkentry = netmap_tg4_check, + .destroy = netmap_tg_destroy, .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 651dce6..98a4c6d 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -40,7 +40,13 @@ static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) if (range->flags & NF_NAT_RANGE_MAP_IPS) return -EINVAL; - return 0; + + return nf_ct_netns_get(par->net, par->family); +} + +static void redirect_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } /* FIXME: Take multiple ranges --RR */ @@ -56,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); } static unsigned int @@ -72,6 +78,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = { .revision = 0, .table = "nat", .checkentry = redirect_tg6_checkentry, + .destroy = redirect_tg_destroy, .target = redirect_tg6, .targetsize = sizeof(struct nf_nat_range), .hooks = (1 << NF_INET_PRE_ROUTING) | @@ -85,6 +92,7 @@ static struct xt_target redirect_tg_reg[] __read_mostly = { .table = "nat", .target = redirect_tg4, .checkentry = redirect_tg4_check, + .destroy = redirect_tg_destroy, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index dbd72cc..80cb7ba 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -531,6 +531,11 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; + int err; + + err = nf_defrag_ipv6_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IP6T_INV_PROTO)) @@ -545,6 +550,11 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) static int tproxy_tg4_check(const struct xt_tgchk_param *par) { const struct ipt_ip *i = par->entryinfo; + int err; + + err = nf_defrag_ipv4_enable(par->net); + if (err) + return err; if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && !(i->invflags & IPT_INV_PROTO)) @@ -596,11 +606,6 @@ static struct xt_target tproxy_tg_reg[] __read_mostly = { static int __init tproxy_tg_init(void) { - nf_defrag_ipv4_enable(); -#ifdef XT_TPROXY_HAVE_IPV6 - nf_defrag_ipv6_enable(); -#endif - return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg)); } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index dffee9d47..2dedaa2 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/filter.h> +#include <linux/bpf.h> #include <linux/netfilter/xt_bpf.h> #include <linux/netfilter/x_tables.h> @@ -20,15 +21,15 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_bpf"); MODULE_ALIAS("ip6t_bpf"); -static int bpf_mt_check(const struct xt_mtchk_param *par) +static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, + struct bpf_prog **ret) { - struct xt_bpf_info *info = par->matchinfo; struct sock_fprog_kern program; - program.len = info->bpf_program_num_elem; - program.filter = info->bpf_program; + program.len = len; + program.filter = insns; - if (bpf_prog_create(&info->filter, &program)) { + if (bpf_prog_create(ret, &program)) { pr_info("bpf: check failed: parse error\n"); return -EINVAL; } @@ -36,6 +37,42 @@ static int bpf_mt_check(const struct xt_mtchk_param *par) return 0; } +static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) +{ + struct bpf_prog *prog; + + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + *ret = prog; + return 0; +} + +static int bpf_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info *info = par->matchinfo; + + return __bpf_mt_check_bytecode(info->bpf_program, + info->bpf_program_num_elem, + &info->filter); +} + +static int bpf_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_bpf_info_v1 *info = par->matchinfo; + + if (info->mode == XT_BPF_MODE_BYTECODE) + return __bpf_mt_check_bytecode(info->bpf_program, + info->bpf_program_num_elem, + &info->filter); + else if (info->mode == XT_BPF_MODE_FD_PINNED || + info->mode == XT_BPF_MODE_FD_ELF) + return __bpf_mt_check_fd(info->fd, &info->filter); + else + return -EINVAL; +} + static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; @@ -43,31 +80,58 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) return BPF_PROG_RUN(info->filter, skb); } +static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_bpf_info_v1 *info = par->matchinfo; + + return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb); +} + static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; + + bpf_prog_destroy(info->filter); +} + +static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + const struct xt_bpf_info_v1 *info = par->matchinfo; + bpf_prog_destroy(info->filter); } -static struct xt_match bpf_mt_reg __read_mostly = { - .name = "bpf", - .revision = 0, - .family = NFPROTO_UNSPEC, - .checkentry = bpf_mt_check, - .match = bpf_mt, - .destroy = bpf_mt_destroy, - .matchsize = sizeof(struct xt_bpf_info), - .me = THIS_MODULE, +static struct xt_match bpf_mt_reg[] __read_mostly = { + { + .name = "bpf", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check, + .match = bpf_mt, + .destroy = bpf_mt_destroy, + .matchsize = sizeof(struct xt_bpf_info), + .me = THIS_MODULE, + }, + { + .name = "bpf", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = bpf_mt_check_v1, + .match = bpf_mt_v1, + .destroy = bpf_mt_destroy_v1, + .matchsize = sizeof(struct xt_bpf_info_v1), + .me = THIS_MODULE, + }, }; static int __init bpf_mt_init(void) { - return xt_register_match(&bpf_mt_reg); + return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } static void __exit bpf_mt_exit(void) { - xt_unregister_match(&bpf_mt_reg); + xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } module_init(bpf_mt_init); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index d4bec26..cad0b7b 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -110,7 +110,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) sinfo->direction != XT_CONNBYTES_DIR_BOTH) return -EINVAL; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -129,7 +129,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connbytes_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 03d66f1..7827128 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -61,7 +61,7 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -70,14 +70,14 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) ret = nf_connlabels_get(par->net, info->bit); if (ret < 0) - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { nf_connlabels_put(par->net); - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match connlabels_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index bb38453..2aff2b7 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -368,7 +368,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for " "address family %u\n", par->family); @@ -378,7 +378,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) /* init private data */ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); if (info->data == NULL) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); return -ENOMEM; } @@ -414,7 +414,7 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) const struct xt_connlimit_info *info = par->matchinfo; unsigned int i; - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) destroy_tree(&info->data->climit_root4[i]); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index b83e158..9935d50 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -77,7 +77,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -86,7 +86,7 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static bool @@ -107,7 +107,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -116,7 +116,7 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_target connmark_tg_reg __read_mostly = { diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 2dea15e..c0fb217 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -271,7 +271,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -280,7 +280,7 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match conntrack_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index f679dd4..38a7815 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -59,7 +59,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) struct xt_helper_info *info = par->matchinfo; int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -71,7 +71,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) static void helper_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match helper_mt_reg __read_mostly = { diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index ec06fb1..1cde0e4 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -44,12 +44,18 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, switch (minfo->flags) { case XT_MULTIPORT_SOURCE: - return (src >= s && src <= e) ^ minfo->invert; + if (src >= s && src <= e) + return true ^ minfo->invert; + break; case XT_MULTIPORT_DESTINATION: - return (dst >= s && dst <= e) ^ minfo->invert; + if (dst >= s && dst <= e) + return true ^ minfo->invert; + break; case XT_MULTIPORT_EITHER: - return ((dst >= s && dst <= e) || - (src >= s && src <= e)) ^ minfo->invert; + if ((dst >= s && dst <= e) || + (src >= s && src <= e)) + return true ^ minfo->invert; + break; default: break; } @@ -59,11 +65,17 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, switch (minfo->flags) { case XT_MULTIPORT_SOURCE: - return (src == s) ^ minfo->invert; + if (src == s) + return true ^ minfo->invert; + break; case XT_MULTIPORT_DESTINATION: - return (dst == s) ^ minfo->invert; + if (dst == s) + return true ^ minfo->invert; + break; case XT_MULTIPORT_EITHER: - return (src == s || dst == s) ^ minfo->invert; + if (src == s || dst == s) + return true ^ minfo->invert; + break; default: break; } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index bea7464c..8107b3e 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -23,7 +23,17 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) par->target->name); return -EINVAL; } - return 0; + return nf_ct_netns_get(par->net, par->family); +} + +static int xt_nat_checkentry(const struct xt_tgchk_param *par) +{ + return nf_ct_netns_get(par->net, par->family); +} + +static void xt_nat_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_netns_put(par->net, par->family); } static void xt_nat_convert_range(struct nf_nat_range *dst, @@ -106,6 +116,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { .name = "SNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, + .destroy = xt_nat_destroy, .target = xt_snat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, @@ -118,6 +129,7 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { .name = "DNAT", .revision = 0, .checkentry = xt_nat_checkentry_v0, + .destroy = xt_nat_destroy, .target = xt_dnat_target_v0, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .family = NFPROTO_IPV4, @@ -129,6 +141,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { { .name = "SNAT", .revision = 1, + .checkentry = xt_nat_checkentry, + .destroy = xt_nat_destroy, .target = xt_snat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", @@ -139,6 +153,8 @@ static struct xt_target xt_nat_target_reg[] __read_mostly = { { .name = "DNAT", .revision = 1, + .checkentry = xt_nat_checkentry, + .destroy = xt_nat_destroy, .target = xt_dnat_target_v1, .targetsize = sizeof(struct nf_nat_range), .table = "nat", diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 2198914..770bbec 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -147,9 +147,28 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_enable_defrag(struct net *net, int family) +{ + switch (family) { + case NFPROTO_IPV4: + return nf_defrag_ipv4_enable(net); +#ifdef XT_SOCKET_HAVE_IPV6 + case NFPROTO_IPV6: + return nf_defrag_ipv6_enable(net); +#endif + } + WARN_ONCE(1, "Unknown family %d\n", family); + return 0; +} + static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); @@ -161,6 +180,11 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + int err; + + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); @@ -173,7 +197,11 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; + int err; + err = socket_mt_enable_defrag(par->net, par->family); + if (err) + return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); @@ -268,11 +296,6 @@ static struct xt_match socket_mt_reg[] __read_mostly = { static int __init socket_mt_init(void) { - nf_defrag_ipv4_enable(); -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - nf_defrag_ipv6_enable(); -#endif - return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index a507922..5746a33 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -43,7 +43,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) { int ret; - ret = nf_ct_l3proto_try_module_get(par->family); + ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) pr_info("cannot load conntrack support for proto=%u\n", par->family); @@ -52,7 +52,7 @@ static int state_mt_check(const struct xt_mtchk_param *par) static void state_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(par->family); + nf_ct_netns_put(par->net, par->family); } static struct xt_match state_mt_reg __read_mostly = { |