summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_tables.h30
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h6
-rw-r--r--net/bridge/br_netfilter_hooks.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c8
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c13
-rw-r--r--net/netfilter/nf_tables_api.c4
-rw-r--r--net/netfilter/nft_ct.c21
-rw-r--r--net/netfilter/nft_meta.c40
-rw-r--r--net/netfilter/nft_nat.c8
-rw-r--r--net/netfilter/nft_set_bitmap.c165
17 files changed, 194 insertions, 154 deletions
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f540f9a..1960587 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -244,7 +244,7 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
u32 seq);
/* Fake conntrack entry for untracked connections */
-DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+DECLARE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
static inline struct nf_conn *nf_ct_untracked_get(void)
{
return raw_cpu_ptr(&nf_conntrack_untracked);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2aa8a9d..01360286 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -103,6 +103,35 @@ struct nft_regs {
};
};
+/* Store/load an u16 or u8 integer to/from the u32 data register.
+ *
+ * Note, when using concatenations, register allocation happens at 32-bit
+ * level. So for store instruction, pad the rest part with zero to avoid
+ * garbage values.
+ */
+
+static inline void nft_reg_store16(u32 *dreg, u16 val)
+{
+ *dreg = 0;
+ *(u16 *)dreg = val;
+}
+
+static inline void nft_reg_store8(u32 *dreg, u8 val)
+{
+ *dreg = 0;
+ *(u8 *)dreg = val;
+}
+
+static inline u16 nft_reg_load16(u32 *sreg)
+{
+ return *(u16 *)sreg;
+}
+
+static inline u8 nft_reg_load8(u32 *sreg)
+{
+ return *(u8 *)sreg;
+}
+
static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
unsigned int len)
{
@@ -203,7 +232,6 @@ struct nft_set_elem {
struct nft_set;
struct nft_set_iter {
u8 genmask;
- bool flush;
unsigned int count;
unsigned int skip;
int err;
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index d150b50..97983d1 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -9,12 +9,13 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ unsigned int flags = IP6_FH_F_AUTH;
int protohdr, thoff = 0;
unsigned short frag_off;
nft_set_pktinfo(pkt, skb, state);
- protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0) {
nft_set_pktinfo_proto_unspec(pkt, skb);
return;
@@ -32,6 +33,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
const struct nf_hook_state *state)
{
#if IS_ENABLED(CONFIG_IPV6)
+ unsigned int flags = IP6_FH_F_AUTH;
struct ipv6hdr *ip6h, _ip6h;
unsigned int thoff = 0;
unsigned short frag_off;
@@ -50,7 +52,7 @@ __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
if (pkt_len + sizeof(*ip6h) > skb->len)
return -1;
- protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
if (protohdr < 0)
return -1;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index fa87fbd..1f1e620 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -706,18 +706,20 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge;
- unsigned int mtu_reserved;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ unsigned int mtu, mtu_reserved;
mtu_reserved = nf_bridge_mtu_reduction(skb);
+ mtu = skb->dev->mtu;
+
+ if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
+ mtu = nf_bridge->frag_max_size;
- if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
+ if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
}
- nf_bridge = nf_bridge_info_get(skb);
-
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index bc1486f..2e14ed1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -165,6 +165,10 @@ static unsigned int ipv4_conntrack_local(void *priv,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
+
+ if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */
+ return NF_ACCEPT;
+
return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index f8aad03..6f5e8d0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,11 +255,6 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
/* maniptype == SRC for postrouting. */
enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
- /* We never see fragments: conntrack defrags on pre-routing
- * and local-out, and nf_nat_out protects post-routing.
- */
- NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
* have dropped it. Hence it's the user's responsibilty to
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index a0ea8aa..f186772 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,10 +26,10 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
if (priv->sreg_proto_min) {
- range.min_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- range.max_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
}
regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
&range, nft_out(pkt));
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 1650ed2..5120be1 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
memset(&mr, 0, sizeof(mr));
if (priv->sreg_proto_min) {
- mr.range[0].min.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- mr.range[0].max.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ mr.range[0].min.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ mr.range[0].max.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 6c5b5b1..4146536 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -27,10 +27,10 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
if (priv->sreg_proto_min) {
- range.min_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- range.max_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
}
regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
nft_out(pkt));
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index f5ac080..a27e424 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -26,10 +26,10 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
if (priv->sreg_proto_min) {
- range.min_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min],
- range.max_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max],
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 071b97f..ffb78e5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -181,7 +181,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
seqcount_t nf_conntrack_generation __read_mostly;
-DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+/* nf_conn must be 8 bytes aligned, as the 3 LSB bits are used
+ * for the nfctinfo. We cheat by (ab)using the PER CPU cache line
+ * alignment to enforce this.
+ */
+DEFINE_PER_CPU_ALIGNED(struct nf_conn, nf_conntrack_untracked);
EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
static unsigned int nf_conntrack_hash_rnd __read_mostly;
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 31d3586..804e8a0 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -33,8 +33,16 @@ sctp_manip_pkt(struct sk_buff *skb,
enum nf_nat_manip_type maniptype)
{
sctp_sctphdr_t *hdr;
+ int hdrsize = 8;
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ /* This could be an inner header returned in imcp packet; in such
+ * cases we cannot update the checksum field since it is outside
+ * of the 8 bytes of transport layer headers we are guaranteed.
+ */
+ if (skb->len >= hdroff + sizeof(*hdr))
+ hdrsize = sizeof(*hdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
hdr = (struct sctphdr *)(skb->data + hdroff);
@@ -47,6 +55,9 @@ sctp_manip_pkt(struct sk_buff *skb,
hdr->dest = tuple->dst.u.sctp.port;
}
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
if (skb->ip_summed != CHECKSUM_PARTIAL) {
hdr->checksum = sctp_compute_cksum(skb, hdroff);
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5e0ccfd..434c739 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3145,7 +3145,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_bind_check_setelem;
- iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
@@ -3399,7 +3398,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
args.iter.count = 0;
args.iter.err = 0;
args.iter.fn = nf_tables_dump_setelem;
- args.iter.flush = false;
set->ops->walk(&ctx, set, &args.iter);
nla_nest_end(skb, nest);
@@ -3963,7 +3961,6 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
struct nft_set_iter iter = {
.genmask = genmask,
.fn = nft_flush_set,
- .flush = true,
};
set->ops->walk(&ctx, set, &iter);
@@ -5114,7 +5111,6 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
iter.count = 0;
iter.err = 0;
iter.fn = nf_tables_loop_check_setelem;
- iter.flush = false;
set->ops->walk(ctx, set, &iter);
if (iter.err < 0)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bf548a7..0264258 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -83,7 +83,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_CT_DIRECTION:
- *dest = CTINFO2DIR(ctinfo);
+ nft_reg_store8(dest, CTINFO2DIR(ctinfo));
return;
case NFT_CT_STATUS:
*dest = ct->status;
@@ -151,20 +151,22 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
return;
}
case NFT_CT_L3PROTOCOL:
- *dest = nf_ct_l3num(ct);
+ nft_reg_store8(dest, nf_ct_l3num(ct));
return;
case NFT_CT_PROTOCOL:
- *dest = nf_ct_protonum(ct);
+ nft_reg_store8(dest, nf_ct_protonum(ct));
return;
#ifdef CONFIG_NF_CONNTRACK_ZONES
case NFT_CT_ZONE: {
const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+ u16 zoneid;
if (priv->dir < IP_CT_DIR_MAX)
- *dest = nf_ct_zone_id(zone, priv->dir);
+ zoneid = nf_ct_zone_id(zone, priv->dir);
else
- *dest = zone->id;
+ zoneid = zone->id;
+ nft_reg_store16(dest, zoneid);
return;
}
#endif
@@ -183,10 +185,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_PROTO_SRC:
- *dest = (__force __u16)tuple->src.u.all;
+ nft_reg_store16(dest, (__force u16)tuple->src.u.all);
return;
case NFT_CT_PROTO_DST:
- *dest = (__force __u16)tuple->dst.u.all;
+ nft_reg_store16(dest, (__force u16)tuple->dst.u.all);
return;
default:
break;
@@ -205,7 +207,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
enum ip_conntrack_info ctinfo;
- u16 value = regs->data[priv->sreg];
+ u16 value = nft_reg_load16(&regs->data[priv->sreg]);
struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
@@ -542,7 +544,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
case IP_CT_DIR_REPLY:
break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ goto err1;
}
}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e1f5ca9..7b60e01 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -45,16 +45,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = skb->len;
break;
case NFT_META_PROTOCOL:
- *dest = 0;
- *(__be16 *)dest = skb->protocol;
+ nft_reg_store16(dest, (__force u16)skb->protocol);
break;
case NFT_META_NFPROTO:
- *dest = nft_pf(pkt);
+ nft_reg_store8(dest, nft_pf(pkt));
break;
case NFT_META_L4PROTO:
if (!pkt->tprot_set)
goto err;
- *dest = pkt->tprot;
+ nft_reg_store8(dest, pkt->tprot);
break;
case NFT_META_PRIORITY:
*dest = skb->priority;
@@ -85,14 +84,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
case NFT_META_IIFTYPE:
if (in == NULL)
goto err;
- *dest = 0;
- *(u16 *)dest = in->type;
+ nft_reg_store16(dest, in->type);
break;
case NFT_META_OIFTYPE:
if (out == NULL)
goto err;
- *dest = 0;
- *(u16 *)dest = out->type;
+ nft_reg_store16(dest, out->type);
break;
case NFT_META_SKUID:
sk = skb_to_full_sk(skb);
@@ -142,19 +139,19 @@ void nft_meta_get_eval(const struct nft_expr *expr,
#endif
case NFT_META_PKTTYPE:
if (skb->pkt_type != PACKET_LOOPBACK) {
- *dest = skb->pkt_type;
+ nft_reg_store8(dest, skb->pkt_type);
break;
}
switch (nft_pf(pkt)) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- *dest = PACKET_MULTICAST;
+ nft_reg_store8(dest, PACKET_MULTICAST);
else
- *dest = PACKET_BROADCAST;
+ nft_reg_store8(dest, PACKET_BROADCAST);
break;
case NFPROTO_IPV6:
- *dest = PACKET_MULTICAST;
+ nft_reg_store8(dest, PACKET_MULTICAST);
break;
case NFPROTO_NETDEV:
switch (skb->protocol) {
@@ -168,14 +165,14 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
if (ipv4_is_multicast(iph->daddr))
- *dest = PACKET_MULTICAST;
+ nft_reg_store8(dest, PACKET_MULTICAST);
else
- *dest = PACKET_BROADCAST;
+ nft_reg_store8(dest, PACKET_BROADCAST);
break;
}
case htons(ETH_P_IPV6):
- *dest = PACKET_MULTICAST;
+ nft_reg_store8(dest, PACKET_MULTICAST);
break;
default:
WARN_ON_ONCE(1);
@@ -230,7 +227,9 @@ void nft_meta_set_eval(const struct nft_expr *expr,
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
- u32 value = regs->data[meta->sreg];
+ u32 *sreg = &regs->data[meta->sreg];
+ u32 value = *sreg;
+ u8 pkt_type;
switch (meta->key) {
case NFT_META_MARK:
@@ -240,9 +239,12 @@ void nft_meta_set_eval(const struct nft_expr *expr,
skb->priority = value;
break;
case NFT_META_PKTTYPE:
- if (skb->pkt_type != value &&
- skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type))
- skb->pkt_type = value;
+ pkt_type = nft_reg_load8(sreg);
+
+ if (skb->pkt_type != pkt_type &&
+ skb_pkt_type_ok(pkt_type) &&
+ skb_pkt_type_ok(skb->pkt_type))
+ skb->pkt_type = pkt_type;
break;
case NFT_META_NFTRACE:
skb->nf_trace = !!value;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 19a7bf3..439e0bd 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -65,10 +65,10 @@ static void nft_nat_eval(const struct nft_expr *expr,
}
if (priv->sreg_proto_min) {
- range.min_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_min];
- range.max_proto.all =
- *(__be16 *)&regs->data[priv->sreg_proto_max];
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 152d226..8ebbc29 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -15,6 +15,11 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+struct nft_bitmap_elem {
+ struct list_head head;
+ struct nft_set_ext ext;
+};
+
/* This bitmap uses two bits to represent one element. These two bits determine
* the element state in the current and the future generation.
*
@@ -41,13 +46,22 @@
* restore its previous state.
*/
struct nft_bitmap {
- u16 bitmap_size;
- u8 bitmap[];
+ struct list_head list;
+ u16 bitmap_size;
+ u8 bitmap[];
};
-static inline void nft_bitmap_location(u32 key, u32 *idx, u32 *off)
+static inline void nft_bitmap_location(const struct nft_set *set,
+ const void *key,
+ u32 *idx, u32 *off)
{
- u32 k = (key << 1);
+ u32 k;
+
+ if (set->klen == 2)
+ k = *(u16 *)key;
+ else
+ k = *(u8 *)key;
+ k <<= 1;
*idx = k / BITS_PER_BYTE;
*off = k % BITS_PER_BYTE;
@@ -69,26 +83,48 @@ static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_cur(net);
u32 idx, off;
- nft_bitmap_location(*key, &idx, &off);
+ nft_bitmap_location(set, key, &idx, &off);
return nft_bitmap_active(priv->bitmap, idx, off, genmask);
}
+static struct nft_bitmap_elem *
+nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+ u8 genmask)
+{
+ const struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *be;
+
+ list_for_each_entry_rcu(be, &priv->list, head) {
+ if (memcmp(nft_set_ext_key(&be->ext),
+ nft_set_ext_key(&this->ext), set->klen) ||
+ !nft_set_elem_active(&be->ext, genmask))
+ continue;
+
+ return be;
+ }
+ return NULL;
+}
+
static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
- struct nft_set_ext **_ext)
+ struct nft_set_ext **ext)
{
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_set_ext *ext = elem->priv;
+ struct nft_bitmap_elem *new = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
- if (nft_bitmap_active(priv->bitmap, idx, off, genmask))
+ be = nft_bitmap_elem_find(set, new, genmask);
+ if (be) {
+ *ext = &be->ext;
return -EEXIST;
+ }
+ nft_bitmap_location(set, nft_set_ext_key(&new->ext), &idx, &off);
/* Enter 01 state. */
priv->bitmap[idx] |= (genmask << off);
+ list_add_tail_rcu(&new->head, &priv->list);
return 0;
}
@@ -98,13 +134,14 @@ static void nft_bitmap_remove(const struct net *net,
const struct nft_set_elem *elem)
{
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_set_ext *ext = elem->priv;
+ struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 00 state. */
priv->bitmap[idx] &= ~(genmask << off);
+ list_del_rcu(&be->head);
}
static void nft_bitmap_activate(const struct net *net,
@@ -112,74 +149,52 @@ static void nft_bitmap_activate(const struct net *net,
const struct nft_set_elem *elem)
{
struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_set_ext *ext = elem->priv;
+ struct nft_bitmap_elem *be = elem->priv;
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 11 state. */
priv->bitmap[idx] |= (genmask << off);
+ nft_set_elem_change_active(net, set, &be->ext);
}
static bool nft_bitmap_flush(const struct net *net,
- const struct nft_set *set, void *ext)
+ const struct nft_set *set, void *_be)
{
struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
+ struct nft_bitmap_elem *be = _be;
u32 idx, off;
- nft_bitmap_location(nft_set_ext_key(ext)->data[0], &idx, &off);
+ nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
/* Enter 10 state, similar to deactivation. */
priv->bitmap[idx] &= ~(genmask << off);
+ nft_set_elem_change_active(net, set, &be->ext);
return true;
}
-static struct nft_set_ext *nft_bitmap_ext_alloc(const struct nft_set *set,
- const struct nft_set_elem *elem)
-{
- struct nft_set_ext_tmpl tmpl;
- struct nft_set_ext *ext;
-
- nft_set_ext_prepare(&tmpl);
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
- ext = kzalloc(tmpl.len, GFP_KERNEL);
- if (!ext)
- return NULL;
-
- nft_set_ext_init(ext, &tmpl);
- memcpy(nft_set_ext_key(ext), elem->key.val.data, set->klen);
-
- return ext;
-}
-
static void *nft_bitmap_deactivate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_bitmap *priv = nft_set_priv(set);
+ struct nft_bitmap_elem *this = elem->priv, *be;
u8 genmask = nft_genmask_next(net);
- struct nft_set_ext *ext;
- u32 idx, off, key = 0;
-
- memcpy(&key, elem->key.val.data, set->klen);
- nft_bitmap_location(key, &idx, &off);
+ u32 idx, off;
- if (!nft_bitmap_active(priv->bitmap, idx, off, genmask))
- return NULL;
+ nft_bitmap_location(set, elem->key.val.data, &idx, &off);
- /* We have no real set extension since this is a bitmap, allocate this
- * dummy object that is released from the commit/abort path.
- */
- ext = nft_bitmap_ext_alloc(set, elem);
- if (!ext)
+ be = nft_bitmap_elem_find(set, this, genmask);
+ if (!be)
return NULL;
/* Enter 10 state. */
priv->bitmap[idx] &= ~(genmask << off);
+ nft_set_elem_change_active(net, set, &be->ext);
- return ext;
+ return be;
}
static void nft_bitmap_walk(const struct nft_ctx *ctx,
@@ -187,47 +202,23 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
struct nft_set_iter *iter)
{
const struct nft_bitmap *priv = nft_set_priv(set);
- struct nft_set_ext_tmpl tmpl;
+ struct nft_bitmap_elem *be;
struct nft_set_elem elem;
- struct nft_set_ext *ext;
- int idx, off;
- u16 key;
-
- nft_set_ext_prepare(&tmpl);
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
-
- for (idx = 0; idx < priv->bitmap_size; idx++) {
- for (off = 0; off < BITS_PER_BYTE; off += 2) {
- if (iter->count < iter->skip)
- goto cont;
-
- if (!nft_bitmap_active(priv->bitmap, idx, off,
- iter->genmask))
- goto cont;
-
- ext = kzalloc(tmpl.len, GFP_KERNEL);
- if (!ext) {
- iter->err = -ENOMEM;
- return;
- }
- nft_set_ext_init(ext, &tmpl);
- key = ((idx * BITS_PER_BYTE) + off) >> 1;
- memcpy(nft_set_ext_key(ext), &key, set->klen);
-
- elem.priv = ext;
- iter->err = iter->fn(ctx, set, iter, &elem);
-
- /* On set flush, this dummy extension object is released
- * from the commit/abort path.
- */
- if (!iter->flush)
- kfree(ext);
-
- if (iter->err < 0)
- return;
+
+ list_for_each_entry_rcu(be, &priv->list, head) {
+ if (iter->count < iter->skip)
+ goto cont;
+ if (!nft_set_elem_active(&be->ext, iter->genmask))
+ goto cont;
+
+ elem.priv = be;
+
+ iter->err = iter->fn(ctx, set, iter, &elem);
+
+ if (iter->err < 0)
+ return;
cont:
- iter->count++;
- }
+ iter->count++;
}
}
@@ -258,6 +249,7 @@ static int nft_bitmap_init(const struct nft_set *set,
{
struct nft_bitmap *priv = nft_set_priv(set);
+ INIT_LIST_HEAD(&priv->list);
priv->bitmap_size = nft_bitmap_size(set->klen);
return 0;
@@ -283,6 +275,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
static struct nft_set_ops nft_bitmap_ops __read_mostly = {
.privsize = nft_bitmap_privsize,
+ .elemsize = offsetof(struct nft_bitmap_elem, ext),
.estimate = nft_bitmap_estimate,
.init = nft_bitmap_init,
.destroy = nft_bitmap_destroy,
OpenPOWER on IntegriCloud