diff options
Diffstat (limited to 'net')
73 files changed, 1102 insertions, 481 deletions
diff --git a/net/Kconfig b/net/Kconfig index 6272420..99815b5 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,7 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS - select ANON_INODES + select BPF ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 992ec49..44cb786 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -112,6 +112,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1bada53..1a4f32c 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) static int br_parse_ip_options(struct sk_buff *skb) { - struct ip_options *opt; const struct iphdr *iph; struct net_device *dev = skb->dev; u32 len; @@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb) goto inhdr_error; iph = ip_hdr(skb); - opt = &(IPCB(skb)->opt); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) @@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb) } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (iph->ihl == 5) - return 0; - - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev_net(dev), opt, skb)) - goto inhdr_error; - - /* Check correct handling of SRR option */ - if (unlikely(opt->srr)) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) - goto drop; - - if (ip_options_rcv_srr(skb)) - goto drop; - } - + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ return 0; inhdr_error: diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index da17a5e..074c557 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -75,9 +75,11 @@ static const struct nf_chain_type filter_bridge = { .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_LOCAL_IN) | + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT), + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), }; static int __init nf_tables_bridge_init(void) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index a764795..48da2c5 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -16,6 +16,239 @@ #include <net/netfilter/nft_reject.h> #include <net/netfilter/ipv4/nf_reject.h> #include <net/netfilter/ipv6/nf_reject.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_bridge.h> +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); +} + +static int nft_reject_iphdr_validate(struct sk_buff *oldskb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(oldskb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(oldskb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (oldskb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(oldskb, iph->ihl*4)) + return 0; + + return 1; +} + +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->ttl = sysctl_ip_default_ttl; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + void *payload; + __wsum csum; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); + memset(icmph, 0, sizeof(*icmph)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static int nft_reject_ip6hdr_validate(struct sk_buff *oldskb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(oldskb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(oldskb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > oldskb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + void *payload; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); + memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], @@ -23,35 +256,46 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v4_tcp_reset(pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + nft_reject_icmp_code(priv->icmp_code)); break; } break; case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, - nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + nft_reject_icmpv6_code(priv->icmp_code)); break; } break; @@ -59,15 +303,38 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, /* No explicit way to reject this protocol, drop it. */ break; } +out: data[NFT_REG_VERDICT].verdict = NF_DROP; } +static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + switch (basechain->ops[0].hooknum) { + case NF_BR_PRE_ROUTING: + case NF_BR_LOCAL_IN: + break; + default: + return -EOPNOTSUPP; + } + } + return 0; +} + static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; + int icmp_code, err; + + err = nft_reject_bridge_validate_hooks(ctx->chain); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -116,6 +383,13 @@ nla_put_failure: return -1; } +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_reject_bridge_validate_hooks(ctx->chain); +} + static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, @@ -123,6 +397,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .eval = nft_reject_bridge_eval, .init = nft_reject_bridge_init, .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index de6662b..7e38b72 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -149,6 +149,7 @@ static int process_one_ticket(struct ceph_auth_client *ac, struct ceph_crypto_key old_key; void *ticket_buf = NULL; void *tp, *tpend; + void **ptp; struct ceph_timespec new_validity; struct ceph_crypto_key new_session_key; struct ceph_buffer *new_ticket_blob; @@ -208,25 +209,19 @@ static int process_one_ticket(struct ceph_auth_client *ac, goto out; } tp = ticket_buf; - dlen = ceph_decode_32(&tp); + ptp = &tp; + tpend = *ptp + dlen; } else { /* unencrypted */ - ceph_decode_32_safe(p, end, dlen, bad); - ticket_buf = kmalloc(dlen, GFP_NOFS); - if (!ticket_buf) { - ret = -ENOMEM; - goto out; - } - tp = ticket_buf; - ceph_decode_need(p, end, dlen, bad); - ceph_decode_copy(p, ticket_buf, dlen); + ptp = p; + tpend = end; } - tpend = tp + dlen; + ceph_decode_32_safe(ptp, tpend, dlen, bad); dout(" ticket blob is %d bytes\n", dlen); - ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); - blob_struct_v = ceph_decode_8(&tp); - new_secret_id = ceph_decode_64(&tp); - ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); + ceph_decode_need(ptp, tpend, 1 + sizeof(u64), bad); + blob_struct_v = ceph_decode_8(ptp); + new_secret_id = ceph_decode_64(ptp); + ret = ceph_decode_buffer(&new_ticket_blob, ptp, tpend); if (ret) goto out; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 62fc5e7..790fe89 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -90,11 +90,82 @@ static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) static const u8 *aes_iv = (u8 *)CEPH_AES_IV; +/* + * Should be used for buffers allocated with ceph_kvmalloc(). + * Currently these are encrypt out-buffer (ceph_buffer) and decrypt + * in-buffer (msg front). + * + * Dispose of @sgt with teardown_sgtable(). + * + * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() + * in cases where a single sg is sufficient. No attempt to reduce the + * number of sgs by squeezing physically contiguous pages together is + * made though, for simplicity. + */ +static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, + const void *buf, unsigned int buf_len) +{ + struct scatterlist *sg; + const bool is_vmalloc = is_vmalloc_addr(buf); + unsigned int off = offset_in_page(buf); + unsigned int chunk_cnt = 1; + unsigned int chunk_len = PAGE_ALIGN(off + buf_len); + int i; + int ret; + + if (buf_len == 0) { + memset(sgt, 0, sizeof(*sgt)); + return -EINVAL; + } + + if (is_vmalloc) { + chunk_cnt = chunk_len >> PAGE_SHIFT; + chunk_len = PAGE_SIZE; + } + + if (chunk_cnt > 1) { + ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); + if (ret) + return ret; + } else { + WARN_ON(chunk_cnt != 1); + sg_init_table(prealloc_sg, 1); + sgt->sgl = prealloc_sg; + sgt->nents = sgt->orig_nents = 1; + } + + for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { + struct page *page; + unsigned int len = min(chunk_len - off, buf_len); + + if (is_vmalloc) + page = vmalloc_to_page(buf); + else + page = virt_to_page(buf); + + sg_set_page(sg, page, len, off); + + off = 0; + buf += len; + buf_len -= len; + } + WARN_ON(buf_len != 0); + + return 0; +} + +static void teardown_sgtable(struct sg_table *sgt) +{ + if (sgt->orig_nents > 1) + sg_free_table(sgt); +} + static int ceph_aes_encrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[2], sg_out[1]; + struct scatterlist sg_in[2], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -110,16 +181,18 @@ static int ceph_aes_encrypt(const void *key, int key_len, *dst_len = src_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], src, src_len); sg_set_buf(&sg_in[1], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -128,16 +201,22 @@ static int ceph_aes_encrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, @@ -145,7 +224,8 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, const void *src1, size_t src1_len, const void *src2, size_t src2_len) { - struct scatterlist sg_in[3], sg_out[1]; + struct scatterlist sg_in[3], prealloc_sg; + struct sg_table sg_out; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; int ret; @@ -161,17 +241,19 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, *dst_len = src1_len + src2_len + zero_padding; - crypto_blkcipher_setkey((void *)tfm, key, key_len); sg_init_table(sg_in, 3); sg_set_buf(&sg_in[0], src1, src1_len); sg_set_buf(&sg_in[1], src2, src2_len); sg_set_buf(&sg_in[2], pad, zero_padding); - sg_init_table(sg_out, 1); - sg_set_buf(sg_out, dst, *dst_len); + ret = setup_sgtable(&sg_out, &prealloc_sg, dst, *dst_len); + if (ret) + goto out_tfm; + + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); + /* print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, key, key_len, 1); @@ -182,23 +264,30 @@ static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, pad, zero_padding, 1); */ - ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, + ret = crypto_blkcipher_encrypt(&desc, sg_out.sgl, sg_in, src1_len + src2_len + zero_padding); - crypto_free_blkcipher(tfm); - if (ret < 0) + if (ret < 0) { pr_err("ceph_aes_crypt2 failed %d\n", ret); + goto out_sg; + } /* print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_out); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt(const void *key, int key_len, void *dst, size_t *dst_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[2]; + struct sg_table sg_in; + struct scatterlist sg_out[2], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -210,16 +299,16 @@ static int ceph_aes_decrypt(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - crypto_blkcipher_setkey((void *)tfm, key, key_len); - sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); - sg_set_buf(sg_in, src, src_len); sg_set_buf(&sg_out[0], dst, *dst_len); sg_set_buf(&sg_out[1], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; + crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -228,12 +317,10 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst_len) @@ -251,7 +338,12 @@ static int ceph_aes_decrypt(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, dst, *dst_len, 1); */ - return 0; + +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } static int ceph_aes_decrypt2(const void *key, int key_len, @@ -259,7 +351,8 @@ static int ceph_aes_decrypt2(const void *key, int key_len, void *dst2, size_t *dst2_len, const void *src, size_t src_len) { - struct scatterlist sg_in[1], sg_out[3]; + struct sg_table sg_in; + struct scatterlist sg_out[3], prealloc_sg; struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); struct blkcipher_desc desc = { .tfm = tfm }; char pad[16]; @@ -271,17 +364,17 @@ static int ceph_aes_decrypt2(const void *key, int key_len, if (IS_ERR(tfm)) return PTR_ERR(tfm); - sg_init_table(sg_in, 1); - sg_set_buf(sg_in, src, src_len); sg_init_table(sg_out, 3); sg_set_buf(&sg_out[0], dst1, *dst1_len); sg_set_buf(&sg_out[1], dst2, *dst2_len); sg_set_buf(&sg_out[2], pad, sizeof(pad)); + ret = setup_sgtable(&sg_in, &prealloc_sg, src, src_len); + if (ret) + goto out_tfm; crypto_blkcipher_setkey((void *)tfm, key, key_len); iv = crypto_blkcipher_crt(tfm)->iv; ivsize = crypto_blkcipher_ivsize(tfm); - memcpy(iv, aes_iv, ivsize); /* @@ -290,12 +383,10 @@ static int ceph_aes_decrypt2(const void *key, int key_len, print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, src, src_len, 1); */ - - ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); - crypto_free_blkcipher(tfm); + ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in.sgl, src_len); if (ret < 0) { pr_err("ceph_aes_decrypt failed %d\n", ret); - return ret; + goto out_sg; } if (src_len <= *dst1_len) @@ -325,7 +416,11 @@ static int ceph_aes_decrypt2(const void *key, int key_len, dst2, *dst2_len, 1); */ - return 0; +out_sg: + teardown_sgtable(&sg_in); +out_tfm: + crypto_free_blkcipher(tfm); + return ret; } diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 559c9f6..8d1653c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) IPPROTO_TCP, &sock); if (ret) return ret; - sock->sk->sk_allocation = GFP_NOFS; + sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC; #ifdef CONFIG_LOCKDEP lockdep_set_class(&sock->sk->sk_lock, &socket_class); @@ -509,6 +509,9 @@ static int ceph_tcp_connect(struct ceph_connection *con) return ret; } + + sk_set_memalloc(sock->sk); + con->sock = sock; return 0; } @@ -2769,8 +2772,11 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); + unsigned long pflags = current->flags; bool fault; + current->flags |= PF_MEMALLOC; + mutex_lock(&con->mutex); while (true) { int ret; @@ -2824,6 +2830,8 @@ static void con_work(struct work_struct *work) con_fault_finish(con); con->ops->put(con); + + tsk_restore_flags(current, pflags, PF_MEMALLOC); } /* diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f3fc54e..6f16428 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1007,8 +1007,8 @@ static void put_osd(struct ceph_osd *osd) static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { dout("__remove_osd %p\n", osd); - BUG_ON(!list_empty(&osd->o_requests)); - BUG_ON(!list_empty(&osd->o_linger_requests)); + WARN_ON(!list_empty(&osd->o_requests)); + WARN_ON(!list_empty(&osd->o_linger_requests)); rb_erase(&osd->o_node, &osdc->osds); list_del_init(&osd->o_osd_lru); @@ -1254,6 +1254,8 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + + list_del_init(&req->r_req_lru_item); /* can be on notarget */ ceph_osdc_put_request(req); } @@ -1395,6 +1397,7 @@ static int __map_request(struct ceph_osd_client *osdc, if (req->r_osd) { __cancel_request(req); list_del_init(&req->r_osd_item); + list_del_init(&req->r_linger_osd_item); req->r_osd = NULL; } diff --git a/net/core/dev.c b/net/core/dev.c index b793e35..945bbd0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1600aa2..06dfb29 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1036,7 +1036,8 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - if (!ops->get_eeprom || !ops->get_eeprom_len) + if (!ops->get_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, @@ -1052,7 +1053,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) u8 *data; int ret = 0; - if (!ops->set_eeprom || !ops->get_eeprom_len) + if (!ops->set_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 61059a0..c16615b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4070,15 +4070,22 @@ EXPORT_SYMBOL_GPL(skb_scrub_packet); unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) { const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - return tcp_hdrlen(skb) + shinfo->gso_size; + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } /* UFO sets gso_size to the size of the fragmentation * payload, i.e. the size of the L4 (UDP) header is already * accounted for. */ - return shinfo->gso_size; + return thlen + shinfo->gso_size; } EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); diff --git a/net/core/tso.c b/net/core/tso.c index 8c3203c..630b30b 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,6 +1,7 @@ #include <linux/export.h> #include <net/ip.h> #include <net/tso.h> +#include <asm/unaligned.h> /* Calculate expected number of TX descriptors */ int tso_count_descs(struct sk_buff *skb) @@ -23,7 +24,7 @@ void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso, iph->id = htons(tso->ip_id); iph->tot_len = htons(size + hdr_len - mac_hdr_len); tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb)); - tcph->seq = htonl(tso->tcp_seq); + put_unaligned_be32(tso->tcp_seq, &tcph->seq); tso->ip_id++; if (!is_last) { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index ca11d28..93ea801 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1080,13 +1080,13 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) if (!app) return -EMSGSIZE; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { if (itr->ifindex == netdev->ifindex) { err = nla_put(skb, DCB_ATTR_IEEE_APP, sizeof(itr->app), &itr->app); if (err) { - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return -EMSGSIZE; } } @@ -1097,7 +1097,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_nest_end(skb, app); /* get peer info if available */ @@ -1234,7 +1234,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) } /* local app */ - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); app = nla_nest_start(skb, DCB_ATTR_CEE_APP_TABLE); if (!app) goto dcb_unlock; @@ -1271,7 +1271,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) else dcbx = -EOPNOTSUPP; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); /* features flags */ if (ops->getfeatcfg) { @@ -1326,7 +1326,7 @@ static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) return 0; dcb_unlock: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); nla_put_failure: return err; } @@ -1762,10 +1762,10 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio = itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1789,7 +1789,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and replace */ if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { if (new->priority) @@ -1804,7 +1804,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) if (new->priority) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1823,10 +1823,10 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) struct dcb_app_type *itr; u8 prio = 0; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) prio |= 1 << itr->app.priority; - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); return prio; } @@ -1850,7 +1850,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and abort if found */ if (dcb_app_lookup(new, dev->ifindex, new->priority)) { err = -EEXIST; @@ -1859,7 +1859,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) err = dcb_app_add(new, dev->ifindex); out: - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1882,7 +1882,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) if (dev->dcbnl_ops->getdcbx) event.dcbx = dev->dcbnl_ops->getdcbx(dev); - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); /* Search for existing match and remove it. */ if ((itr = dcb_app_lookup(del, dev->ifindex, del->priority))) { list_del(&itr->list); @@ -1890,7 +1890,7 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) err = 0; } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); return err; @@ -1902,12 +1902,12 @@ static void dcb_flushapp(void) struct dcb_app_type *app; struct dcb_app_type *tmp; - spin_lock(&dcb_lock); + spin_lock_bh(&dcb_lock); list_for_each_entry_safe(app, tmp, &dcb_app_list, list) { list_del(&app->list); kfree(app); } - spin_unlock(&dcb_lock); + spin_unlock_bh(&dcb_lock); } static int __init dcbnl_init(void) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22f34cf..6317b41 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -174,8 +174,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, dst->rcv = brcm_netdev_ops.rcv; break; #endif - default: + case DSA_TAG_PROTO_NONE: break; + default: + ret = -ENOPROTOOPT; + goto out; } dst->tag_protocol = drv->tag_protocol; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6d18174..ab03e00 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -489,11 +489,14 @@ static void dsa_slave_phy_setup(struct dsa_slave_priv *p, /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) + if (!p->phy) { p->phy = ds->slave_mii_bus->phy_map[p->port]; - else + phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); + } else { pr_info("attached PHY at address %d [%s]\n", p->phy->addr, p->phy->drv->name); + } } int dsa_slave_suspend(struct net_device *slave_dev) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 92db7a6..8b7fe5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += ihl; skb_reset_transport_header(skb); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index f2e1573..8f7bd56 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -62,6 +62,10 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) else res->tclassid = 0; #endif + + if (err == -ESRCH) + err = -ENETUNREACH; + return err; } EXPORT_SYMBOL_GPL(__fib_lookup); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 32e7892..606c520 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -133,6 +133,8 @@ static int fou_gro_complete(struct sk_buff *skb, int nhoff) int err = -ENOSYS; const struct net_offload **offloads; + udp_tunnel_gro_complete(skb, nhoff); + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 065cd94..dedb21e 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -144,6 +144,8 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet); } @@ -364,6 +366,7 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { destroy_workqueue(geneve_wq); + unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ccda096..bb5947b 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -47,7 +47,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); if (unlikely(ghl < sizeof(*greh))) goto out; @@ -68,7 +68,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->mac_len = skb_inner_network_offset(skb); /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = skb_mac_gso_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, ghl, mac_offset, mac_len); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index fb70e3e..bb15d0e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) return scount; } -#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb)) - -static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) +static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu) { struct sk_buff *skb; struct rtable *rt; @@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct flowi4 fl4; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu; while (1) { skb = alloc_skb(size + hlen + tlen, @@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) return NULL; } skb->priority = TC_PRIO_CONTROL; - igmp_skb_size(skb) = size; rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0, 0, 0, @@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); skb_reset_network_header(skb); @@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9eb89f3..19419b6 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -146,7 +146,6 @@ evict_again: atomic_inc(&fq->refcnt); spin_unlock(&hb->chain_lock); del_timer_sync(&fq->timer); - WARN_ON(atomic_read(&fq->refcnt) != 1); inet_frag_put(fq, f); goto evict_again; } @@ -285,7 +284,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - hlist_del(&fq->list); + if (!(fq->flags & INET_FRAG_EVICTED)) + hlist_del(&fq->list); spin_unlock(&hb->chain_lock); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 88e5ef2..bc6471d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -231,7 +231,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { + if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c373a9a..9daf2177 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -195,7 +195,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; -#if defined(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) { diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index b023b4e..1baaa83 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -6,48 +6,45 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <net/ip.h> #include <net/tcp.h> #include <net/route.h> #include <net/dst.h> #include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_reject.h> -/* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; + return NULL; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); + sizeof(struct tcphdr), _oth); if (oth == NULL) - return; + return NULL; /* No RST for RST. */ if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; + return NULL; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); + return NULL; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); - skb_reserve(nskb, LL_MAX_HEADER); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); @@ -56,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; + niph->protocol = protocol; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); @@ -68,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; - if (oth->ack) + if (oth->ack) { tcph->seq = oth->ack_seq; - else { + } else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); @@ -83,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); - nskb->protocol = htons(ETH_P_IP); + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; @@ -125,3 +162,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) kfree_skb(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 1c636d6..665de06 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -24,6 +24,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, @@ -39,6 +40,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .eval = nft_masq_ipv4_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2d4ae46..6a2155b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1798,6 +1798,7 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); res.type = RTN_UNREACHABLE; + res.fi = NULL; goto local_input; /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1bec4e7..39ec0c3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return raw_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a12b455..88fa2d1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) /* Undo procedures. */ +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static bool tcp_any_retrans_done(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return true; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return true; + + return false; +} + #if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { @@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ tcp_moderate_cwnd(tp); + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; return true; } tcp_set_ca_state(sk, TCP_CA_Open); @@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk) return false; } -/* We can clear retrans_stamp when there are no retransmissions in the - * window. It would seem that it is trivially available for us in - * tp->retrans_out, however, that kind of assumptions doesn't consider - * what will happen if errors occur when sending retransmission for the - * second time. ...It could the that such segment has only - * TCPCB_EVER_RETRANS set at the present time. It seems that checking - * the head skb is enough except for some reneging corner cases that - * are not worth the effort. - * - * Main reason for all this complexity is the fact that connection dying - * time now depends on the validity of the retrans_stamp, in particular, - * that successive retransmissions of a segment must not advance - * retrans_stamp under any conditions. - */ -static bool tcp_any_retrans_done(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (tp->retrans_out) - return true; - - skb = tcp_write_queue_head(sk); - if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) - return true; - - return false; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 94d1a77..9c7d762 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -206,8 +206,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_dport = usin->sin_port; inet->inet_daddr = daddr; - inet_set_txhash(sk); - inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; @@ -224,6 +222,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err) goto failure; + inet_set_txhash(sk); + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, inet->inet_sport, inet->inet_dport, sk); if (IS_ERR(rt)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3af2129..a3d453b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 507310e..6480cea 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -58,7 +58,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, skb->encap_hdr_csum = 1; /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + enc_features = skb->dev->hw_enc_features & features; segs = gso_inner_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 725c763..0169ccf 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4531,6 +4531,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + inet6_ifinfo_notify(RTM_NEWLINK, idev); addrconf_verify_rtnl(); return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 12c3c8e..4564e1f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -961,8 +961,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - /* Precalculate GRE options length */ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { if (t->parms.o_flags&GRE_CSUM) @@ -1272,6 +1270,7 @@ static int ip6gre_tunnel_init(struct net_device *dev) u64_stats_init(&ip6gre_tunnel_stats->syncp); } + dev->iflink = tunnel->parms.link; return 0; } @@ -1481,6 +1480,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; + dev->iflink = tunnel->parms.link; + return 0; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 91014d3..a071563 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -90,7 +90,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, encap = SKB_GSO_CB(skb)->encap_level > 0; if (encap) - features = skb->dev->hw_enc_features & netif_skb_features(skb); + features &= skb->dev->hw_enc_features; SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); ipv6h = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9409887..9cb94cf 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -272,9 +272,6 @@ static int ip6_tnl_create2(struct net_device *dev) int err; t = netdev_priv(dev); - err = ip6_tnl_dev_init(dev); - if (err < 0) - goto out; err = register_netdevice(dev); if (err < 0) @@ -1462,6 +1459,7 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ip6_tnl_netdev_ops = { + .ndo_init = ip6_tnl_dev_init, .ndo_uninit = ip6_tnl_dev_uninit, .ndo_start_xmit = ip6_tnl_xmit, .ndo_do_ioctl = ip6_tnl_ioctl, @@ -1546,16 +1544,10 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); - int err = ip6_tnl_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - ip6_tnl_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d440bb5..31089d1 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -172,10 +172,6 @@ static int vti6_tnl_create2(struct net_device *dev) struct vti6_net *ip6n = net_generic(net, vti6_net_id); int err; - err = vti6_dev_init(dev); - if (err < 0) - goto out; - err = register_netdevice(dev); if (err < 0) goto out; @@ -783,6 +779,7 @@ static int vti6_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops vti6_netdev_ops = { + .ndo_init = vti6_dev_init, .ndo_uninit = vti6_dev_uninit, .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, @@ -852,16 +849,10 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) struct ip6_tnl *t = netdev_priv(dev); struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); - int err = vti6_dev_init_gen(dev); - - if (err) - return err; t->parms.proto = IPPROTO_IPV6; dev_hold(dev); - vti6_link_config(t); - rcu_assign_pointer(ip6n->tnls_wc[0], t); return 0; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0171f08..1a01d79 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1439,6 +1439,10 @@ reg_pernet_fail: void ip6_mr_cleanup(void) { + rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE); +#ifdef CONFIG_IPV6_PIMSM_V2 + inet6_del_protocol(&pim6_protocol, IPPROTO_PIM); +#endif unregister_netdevice_notifier(&ip6_mr_notifier); unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9648de2..ed2c4e4 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1550,7 +1550,7 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { struct net_device *dev = idev->dev; struct net *net = dev_net(dev); @@ -1561,13 +1561,13 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu + hlen + tlen; int err; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - size += hlen + tlen; /* limit our allocations to order-0 page */ size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); @@ -1576,6 +1576,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) return NULL; skb->priority = TC_PRIO_CONTROL; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { @@ -1690,8 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 5f5f043..015eb8a 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -5,121 +5,109 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#include <linux/module.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip6_fib.h> #include <net/ip6_checksum.h> #include <linux/netfilter_ipv6.h> +#include <net/netfilter/ipv6/nf_reject.h> -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; u8 proto; __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } + int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); - return; + return NULL; } - otcplen = oldskb->len - tcphoff; + *otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; + if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n", + proto, *otcplen); + return NULL; } - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); + otcph = skb_header_pointer(oldskb, tcphoff, sizeof(struct tcphdr), + otcph); + if (otcph == NULL) + return NULL; /* No RST for RST. */ - if (otcph.rst) { + if (otcph->rst) { pr_debug("RST is set\n"); - return; + return NULL; } /* Check checksum. */ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); - return; + return NULL; } - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); + return otcph; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); - skb_reserve(nskb, hh_len + dst->header_len); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit) +{ + struct ipv6hdr *ip6h; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = hoplimit; + ip6h->nexthdr = protocol; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + nskb->protocol = htons(ETH_P_IPV6); + + return ip6h; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); + +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) +{ + struct tcphdr *tcph; + int needs_ack; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; + tcph->source = oth->dest; + tcph->dest = oth->source; - if (otcph.ack) { + if (oth->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + otcplen - (oth->doff<<2)); tcph->seq = 0; } @@ -137,6 +125,63 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) sizeof(struct tcphdr), IPPROTO_TCP, csum_partial(tcph, sizeof(struct tcphdr), 0)); +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr _otcph; + const struct tcphdr *otcph; + unsigned int otcplen, hh_len; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; + struct dst_entry *dst = NULL; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + otcph = nf_reject_ip6_tcphdr_get(oldskb, &_otcph, &otcplen, hook); + if (!otcph) + return; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph->dest; + fl6.fl6_dport = otcph->source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + ip6_dst_hoplimit(dst)); + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); @@ -161,3 +206,5 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) ip6_local_out(nskb); } EXPORT_SYMBOL_GPL(nf_send_reset6); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 556262f..529c119 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -25,6 +25,7 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr, struct nf_nat_range range; unsigned int verdict; + memset(&range, 0, sizeof(range)); range.flags = priv->flags; verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); @@ -39,6 +40,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .eval = nft_masq_ipv6_eval, .init = nft_masq_init, .dump = nft_masq_dump, + .validate = nft_masq_validate, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index fc24c39..97f41a3 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,11 +3,45 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include <linux/export.h> +#include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 58e5b47..a24557a1 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -195,10 +195,8 @@ static int ipip6_tunnel_create(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); int err; - err = ipip6_tunnel_init(dev); - if (err < 0) - goto out; - ipip6_tunnel_clone_6rd(dev, sitn); + memcpy(dev->dev_addr, &t->parms.iph.saddr, 4); + memcpy(dev->broadcast, &t->parms.iph.daddr, 4); if ((__force u16)t->parms.i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; @@ -207,7 +205,8 @@ static int ipip6_tunnel_create(struct net_device *dev) if (err < 0) goto out; - strcpy(t->parms.name, dev->name); + ipip6_tunnel_clone_6rd(dev, sitn); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1330,6 +1329,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops ipip6_netdev_ops = { + .ndo_init = ipip6_tunnel_init, .ndo_uninit = ipip6_tunnel_uninit, .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, @@ -1378,9 +1378,7 @@ static int ipip6_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + strcpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); @@ -1405,7 +1403,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) tunnel->dev = dev; tunnel->net = dev_net(dev); - strcpy(tunnel->parms.name, dev->name); iph->version = 4; iph->protocol = IPPROTO_IPV6; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8314955..ace29b6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -200,8 +200,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; - ip6_set_txhash(sk); - /* * TCP over IPv4 */ @@ -297,6 +295,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; + ip6_set_txhash(sk); + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ac49f84..5f98364 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_DCCP: if (!onlyproto && (nh + offset + 4 < skb->data || pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports = (__be16 *)exthdr; + __be16 *ports; + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); fl6->fl6_sport = ports[!!reverse]; fl6->fl6_dport = ports[!reverse]; } @@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_ICMPV6: if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) { - u8 *icmp = (u8 *)exthdr; + u8 *icmp; + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); fl6->fl6_icmp_type = icmp[0]; fl6->fl6_icmp_code = icmp[1]; } @@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; - mh = (struct ip6_mh *)exthdr; + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); fl6->fl6_mh_type = mh->ip6mh_type; } fl6->flowi6_proto = nexthdr; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 91729b8..1b095ca 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1764,6 +1764,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct ipxhdr *ipx = NULL; struct sk_buff *skb; int copied, rc; + bool locked = true; lock_sock(sk); /* put the autobinding in */ @@ -1790,6 +1791,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (sock_flag(sk, SOCK_ZAPPED)) goto out; + release_sock(sk); + locked = false; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); if (!skb) { @@ -1826,7 +1829,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - release_sock(sk); + if (locked) + release_sock(sk); return rc; } diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92fafd4..3f3a6cb 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1064,8 +1064,6 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state != TCP_ESTABLISHED) { sock->state = SS_UNCONNECTED; - if (sk->sk_prot->disconnect(sk, flags)) - sock->state = SS_DISCONNECTING; err = sock_error(sk); if (!err) err = -ECONNRESET; diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 0a3c171..6dec088 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -1,4 +1,4 @@ # # Makefile for MPLS. # -obj-y += mpls_gso.o +obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index e28ed2e..e3545f2 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -48,7 +48,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, __skb_push(skb, skb->mac_len); /* Segment inner packet. */ - mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); @@ -59,8 +59,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * above pulled. It will be re-pushed after returning * skb_mac_gso_segment(), an indirect caller of this function. */ - __skb_push(skb, skb->data - skb_mac_header(skb)); - + __skb_pull(skb, skb->data - skb_mac_header(skb)); out: return segs; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 912e5a0..d259da3 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - if (index > inst->ip_set_max) + if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); @@ -1863,6 +1863,12 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; + + if (*len < sizeof(struct ip_set_req_version)) { + ret = -EINVAL; + goto done; + } + if (req_version->version != IPSET_PROTOCOL) { ret = -EPROTO; goto done; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 91f17c1..bd90bf8 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -316,7 +316,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI4\n", &dest->addr.ip); + " daddr=%pI4\n", &daddr); goto err_put; } @@ -458,7 +458,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI6\n", &dest->addr.in6); + " daddr=%pI6\n", daddr); goto err_put; } @@ -846,6 +846,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) goto error; + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); consume_skb(skb); skb = new_skb; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5016a69..2c69975 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -611,12 +611,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); - /* We have to check the DYING flag inside the lock to prevent - a race against nf_ct_get_next_corpse() possibly called from - user context, else we insert an already 'dead' hash, blocking - further use of that particular connection -JM */ + + /* We have to check the DYING flag after unlink to prevent + * a race against nf_ct_get_next_corpse() possibly called from + * user context, else we insert an already 'dead' hash, blocking + * further use of that particular connection -JM. + */ + nf_ct_del_from_dying_or_unconfirmed_list(ct); if (unlikely(nf_ct_is_dying(ct))) { + nf_ct_add_to_dying_list(ct); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_ACCEPT; @@ -636,8 +640,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; - nf_ct_del_from_dying_or_unconfirmed_list(ct); - /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44d1ea3..d87b642 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open @@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sFW -> sIV * sCW -> sIV * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. + * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 556a0df..66e8425 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (IS_ERR(stats)) { + if (stats == NULL) { module_put(type->owner); kfree(basechain); - return PTR_ERR(stats); + return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } @@ -3484,13 +3484,8 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -/* Schedule objects for release via rcu to make sure no packets are accesing - * removed rules. - */ -static void nf_tables_commit_release_rcu(struct rcu_head *rt) +static void nf_tables_commit_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_DELTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3612,10 +3607,11 @@ static int nf_tables_commit(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu); + nf_tables_commit_release(trans); } nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); @@ -3623,13 +3619,8 @@ static int nf_tables_commit(struct sk_buff *skb) return 0; } -/* Schedule objects for release via rcu to make sure no packets are accesing - * aborted rules. - */ -static void nf_tables_abort_release_rcu(struct rcu_head *rt) +static void nf_tables_abort_release(struct nft_trans *trans) { - struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head); - switch (trans->msg_type) { case NFT_MSG_NEWTABLE: nf_tables_table_destroy(&trans->ctx); @@ -3725,11 +3716,12 @@ static int nf_tables_abort(struct sk_buff *skb) } } + synchronize_rcu(); + list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); - trans->ctx.nla = NULL; - call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu); + nf_tables_abort_release(trans); } return 0; @@ -3744,6 +3736,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = { .abort = nf_tables_abort, }; +int nft_chain_validate_dependency(const struct nft_chain *chain, + enum nft_chain_type type) +{ + const struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + if (basechain->type->type != type) + return -EOPNOTSUPP; + } + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1e3a05..5f1be5b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -43,7 +43,8 @@ #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -343,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void @@ -649,7 +651,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -678,8 +681,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; @@ -692,8 +694,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index a82077d..7c60ccd 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -665,7 +665,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) * returned by nf_queue. For instance, callers rely on -ECANCELED to * mean 'ignore this hook'. */ - if (IS_ERR(segs)) + if (IS_ERR_OR_NULL(segs)) goto out_err; queued = 0; err = 0; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7e2683c..265e190 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -19,9 +19,24 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#include <asm/uaccess.h> /* for set_fs */ #include <net/netfilter/nf_tables.h> +static int nft_compat_chain_validate_dependency(const char *tablename, + const struct nft_chain *chain) +{ + const struct nft_base_chain *basechain; + + if (!tablename || !(chain->flags & NFT_BASE_CHAIN)) + return 0; + + basechain = nft_base_chain(chain); + if (strcmp(tablename, "nat") == 0 && + basechain->type->type != NFT_CHAIN_T_NAT) + return -EINVAL; + + return 0; +} + union nft_entry { struct ipt_entry e4; struct ip6t_entry e6; @@ -74,7 +89,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, struct xt_target *target, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: @@ -95,6 +110,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } @@ -151,6 +168,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(target->table, ctx->chain); + if (ret < 0) + goto err; + target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -216,6 +237,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, { struct xt_target *target = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -223,11 +245,13 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & target->hooks) - return 0; + if (!(hook_mask & target->hooks)) + return -EINVAL; - /* This target is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(target->table, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -272,7 +296,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, union nft_entry *entry, u8 proto, bool inv) { - par->net = &init_net; + par->net = ctx->net; par->table = ctx->table->name; switch (ctx->afi->family) { case AF_INET: @@ -293,6 +317,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; par->hook_mask = 1 << ops->hooknum; + } else { + par->hook_mask = 0; } par->family = ctx->afi->family; } @@ -320,6 +346,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, union nft_entry e = {}; int ret; + ret = nft_compat_chain_validate_dependency(match->table, ctx->chain); + if (ret < 0) + goto err; + match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info); if (ctx->nla[NFTA_RULE_COMPAT]) { @@ -379,6 +409,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, { struct xt_match *match = expr->ops->data; unsigned int hook_mask = 0; + int ret; if (ctx->chain->flags & NFT_BASE_CHAIN) { const struct nft_base_chain *basechain = @@ -386,11 +417,13 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (hook_mask & match->hooks) - return 0; + if (!(hook_mask & match->hooks)) + return -EINVAL; - /* This match is being called from an invalid chain */ - return -EINVAL; + ret = nft_compat_chain_validate_dependency(match->table, + ctx->chain); + if (ret < 0) + return ret; } return 0; } @@ -611,7 +644,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, family = ctx->afi->family; /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_match_list, head) { + list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 6637bab..d1ffd5e 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -26,6 +26,11 @@ int nft_masq_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_masq *priv = nft_expr_priv(expr); + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; if (tb[NFTA_MASQ_FLAGS] == NULL) return 0; @@ -55,5 +60,12 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_masq_dump); +int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 799550b..afe2b0b 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -95,7 +95,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, u32 family; int err; - if (tb[NFTA_NAT_TYPE] == NULL) + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_NAT_TYPE] == NULL || + (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && + tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) return -EINVAL; switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) { @@ -120,38 +126,44 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MIN])); + priv->sreg_addr_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); + err = nft_validate_input_register(priv->sreg_addr_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_ADDR_MAX])); - err = nft_validate_input_register(priv->sreg_addr_max); - if (err < 0) - return err; - } else - priv->sreg_addr_max = priv->sreg_addr_min; + if (tb[NFTA_NAT_REG_ADDR_MAX]) { + priv->sreg_addr_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + + err = nft_validate_input_register(priv->sreg_addr_max); + if (err < 0) + return err; + } else { + priv->sreg_addr_max = priv->sreg_addr_min; + } + } if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MIN])); + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); if (err < 0) return err; - } - if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = ntohl(nla_get_be32( - tb[NFTA_NAT_REG_PROTO_MAX])); - err = nft_validate_input_register(priv->sreg_proto_max); - if (err < 0) - return err; - } else - priv->sreg_proto_max = priv->sreg_proto_min; + if (tb[NFTA_NAT_REG_PROTO_MAX]) { + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } if (tb[NFTA_NAT_FLAGS]) { priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS])); @@ -179,17 +191,19 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family))) goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min))) - goto nla_put_failure; - if (nla_put_be32(skb, - NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max))) - goto nla_put_failure; + + if (priv->sreg_addr_min) { + if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, + htonl(priv->sreg_addr_min)) || + nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, + htonl(priv->sreg_addr_max))) + goto nla_put_failure; + } + if (priv->sreg_proto_min) { if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) - goto nla_put_failure; - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, + htonl(priv->sreg_proto_min)) || + nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max))) goto nla_put_failure; } @@ -205,6 +219,13 @@ nla_put_failure: return -1; } +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} + static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, @@ -212,6 +233,7 @@ static const struct nft_expr_ops nft_nat_ops = { .eval = nft_nat_eval, .init = nft_nat_init, .dump = nft_nat_dump, + .validate = nft_nat_validate, }; static struct nft_expr_type nft_nat_type __read_mostly = { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7a186e7..0007b81 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -96,6 +96,14 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); static void netlink_skb_destructor(struct sk_buff *skb); +/* nl_table locking explained: + * Lookup and traversal are protected with nl_sk_hash_lock or nl_table_lock + * combined with an RCU read-side lock. Insertion and removal are protected + * with nl_sk_hash_lock while using RCU list modification primitives and may + * run in parallel to nl_table_lock protected lookups. Destruction of the + * Netlink socket may only occur *after* nl_table_lock has been acquired + * either during or after the socket has been removed from the list. + */ DEFINE_RWLOCK(nl_table_lock); EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); @@ -109,10 +117,10 @@ EXPORT_SYMBOL_GPL(nl_sk_hash_lock); static int lockdep_nl_sk_hash_is_held(void) { #ifdef CONFIG_LOCKDEP - return (debug_locks) ? lockdep_is_held(&nl_sk_hash_lock) : 1; -#else - return 1; + if (debug_locks) + return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); #endif + return 1; } static ATOMIC_NOTIFIER_HEAD(netlink_chain); @@ -1028,11 +1036,13 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) struct netlink_table *table = &nl_table[protocol]; struct sock *sk; + read_lock(&nl_table_lock); rcu_read_lock(); sk = __netlink_lookup(table, portid, net); if (sk) sock_hold(sk); rcu_read_unlock(); + read_unlock(&nl_table_lock); return sk; } @@ -1257,9 +1267,6 @@ static int netlink_release(struct socket *sock) } netlink_table_ungrab(); - /* Wait for readers to complete */ - synchronize_net(); - kfree(nlk->groups); nlk->groups = NULL; @@ -1281,6 +1288,7 @@ static int netlink_autobind(struct socket *sock) retry: cond_resched(); + netlink_table_grab(); rcu_read_lock(); if (__netlink_lookup(table, portid, net)) { /* Bind collision, search negative portid values. */ @@ -1288,9 +1296,11 @@ retry: if (rover > -4097) rover = -4097; rcu_read_unlock(); + netlink_table_ungrab(); goto retry; } rcu_read_unlock(); + netlink_table_ungrab(); err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) @@ -1430,7 +1440,7 @@ static void netlink_unbind(int group, long unsigned int groups, return; for (undo = 0; undo < group; undo++) - if (test_bit(group, &groups)) + if (test_bit(undo, &groups)) nlk->netlink_unbind(undo); } @@ -1482,7 +1492,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, net, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_unbind(nlk->ngroups - 1, groups, nlk); + netlink_unbind(nlk->ngroups, groups, nlk); return err; } } @@ -2499,6 +2509,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; + nl_table[unit].unbind = cfg->unbind; nl_table[unit].flags = cfg->flags; if (cfg->compare) nl_table[unit].compare = cfg->compare; @@ -2921,14 +2932,16 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) } static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(nl_table_lock) __acquires(RCU) { + read_lock(&nl_table_lock); rcu_read_lock(); return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct rhashtable *ht; struct netlink_sock *nlk; struct nl_seq_iter *iter; struct net *net; @@ -2943,19 +2956,19 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter = seq->private; nlk = v; - rht_for_each_entry_rcu(nlk, nlk->node.next, node) + i = iter->link; + ht = &nl_table[i].hash; + rht_for_each_entry(nlk, nlk->node.next, ht, node) if (net_eq(sock_net((struct sock *)nlk), net)) return nlk; - i = iter->link; j = iter->hash_idx + 1; do { - struct rhashtable *ht = &nl_table[i].hash; const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); for (; j < tbl->size; j++) { - rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) { + rht_for_each_entry(nlk, tbl->buckets[j], ht, node) { if (net_eq(sock_net((struct sock *)nlk), net)) { iter->link = i; iter->hash_idx = j; @@ -2971,9 +2984,10 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void netlink_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(RCU) __releases(nl_table_lock) { rcu_read_unlock(); + read_unlock(&nl_table_lock); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 006886d..8c4229b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -246,11 +246,11 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, { int transport_len = skb->len - skb_transport_offset(skb); - if (l4_proto == IPPROTO_TCP) { + if (l4_proto == NEXTHDR_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, addr, new_addr, 1); - } else if (l4_proto == IPPROTO_UDP) { + } else if (l4_proto == NEXTHDR_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); @@ -261,6 +261,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, uh->check = CSUM_MANGLED_0; } } + } else if (l4_proto == NEXTHDR_ICMP) { + if (likely(transport_len >= sizeof(struct icmp6hdr))) + inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum, + skb, addr, new_addr, 1); } } @@ -722,8 +726,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); - if (unlikely(err)) /* skb already freed. */ - return err; break; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2e31d9e..f9e556b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -324,6 +324,8 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; /* Queue all of the segments. */ skb = segs; @@ -1263,7 +1265,7 @@ static size_t ovs_dp_cmd_msg_size(void) return msgsize; } -/* Called with ovs_mutex or RCU read lock. */ +/* Called with ovs_mutex. */ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { @@ -1553,7 +1555,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!reply) return -ENOMEM; - rcu_read_lock(); + ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); @@ -1562,12 +1564,12 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); - rcu_read_unlock(); + ovs_unlock(); return genlmsg_reply(reply, info); err_unlock_free: - rcu_read_unlock(); + ovs_unlock(); kfree_skb(reply); return err; } @@ -1579,8 +1581,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int skip = cb->args[0]; int i = 0; - rcu_read_lock(); - list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { + ovs_lock(); + list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1588,7 +1590,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; i++; } - rcu_read_unlock(); + ovs_unlock(); cb->args[0] = i; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb3..089b195 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -145,7 +145,7 @@ static bool match_validate(const struct sw_flow_match *match, if (match->key->eth.type == htons(ETH_P_ARP) || match->key->eth.type == htons(ETH_P_RARP)) { key_expected |= 1 << OVS_KEY_ATTR_ARP; - if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } @@ -689,6 +689,13 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } + + if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { + OVS_NLERR("IPv6 flow label %x is out of range (max=%x).\n", + ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); + return -EINVAL; + } + SW_FLOW_KEY_PUT(match, ipv6.label, ipv6_key->ipv6_label, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cf61b3..76f402e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -947,7 +947,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) { if (qdisc_is_percpu_stats(sch)) { sch->cpu_bstats = - alloc_percpu(struct gnet_stats_basic_cpu); + netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); if (!sch->cpu_bstats) goto err_out4; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 33d7a98..b783a44 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -445,7 +445,6 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = q->params.limit; setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); - mod_timer(&q->adapt_timer, jiffies + HZ / 2); if (opt) { int err = pie_change(sch, opt); @@ -454,6 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) return err; } + mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; } diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 0e85291..fb7976a 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -862,8 +862,6 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, list_add(&cur_key->key_list, sh_keys); cur_key->key = key; - sctp_auth_key_hold(key); - return 0; nomem: if (!replace) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ab734be..9f32741 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2609,6 +2609,9 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); af = sctp_get_af_specific(param_type2af(param.p->type)); + if (af == NULL) + break; + af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0); diff --git a/net/tipc/node.c b/net/tipc/node.c index 90cee4a..5781634 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -219,11 +219,11 @@ void tipc_node_abort_sock_conns(struct list_head *conns) void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active = &n_ptr->active_links[0]; - u32 addr = n_ptr->addr; n_ptr->working_links++; - tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, - l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; + pr_info("Established link <%s> on network plane %c\n", l_ptr->name, l_ptr->net_plane); @@ -284,10 +284,10 @@ static void node_select_active_links(struct tipc_node *n_ptr) void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr) { struct tipc_link **active; - u32 addr = n_ptr->addr; n_ptr->working_links--; - tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, l_ptr->bearer_id, addr); + n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN; + n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id; if (!tipc_link_is_active(l_ptr)) { pr_info("Lost standby link <%s> on network plane %c\n", @@ -552,28 +552,30 @@ void tipc_node_unlock(struct tipc_node *node) LIST_HEAD(conn_sks); struct sk_buff_head waiting_sks; u32 addr = 0; - unsigned int flags = node->action_flags; + int flags = node->action_flags; + u32 link_id = 0; - if (likely(!node->action_flags)) { + if (likely(!flags)) { spin_unlock_bh(&node->lock); return; } + addr = node->addr; + link_id = node->link_id; __skb_queue_head_init(&waiting_sks); - if (node->action_flags & TIPC_WAKEUP_USERS) { + + if (flags & TIPC_WAKEUP_USERS) skb_queue_splice_init(&node->waiting_sks, &waiting_sks); - node->action_flags &= ~TIPC_WAKEUP_USERS; - } - if (node->action_flags & TIPC_NOTIFY_NODE_DOWN) { + + if (flags & TIPC_NOTIFY_NODE_DOWN) { list_replace_init(&node->nsub, &nsub_list); list_replace_init(&node->conn_sks, &conn_sks); - node->action_flags &= ~TIPC_NOTIFY_NODE_DOWN; } - if (node->action_flags & TIPC_NOTIFY_NODE_UP) { - node->action_flags &= ~TIPC_NOTIFY_NODE_UP; - addr = node->addr; - } - node->action_flags &= ~TIPC_WAKEUP_BCAST_USERS; + node->action_flags &= ~(TIPC_WAKEUP_USERS | TIPC_NOTIFY_NODE_DOWN | + TIPC_NOTIFY_NODE_UP | TIPC_NOTIFY_LINK_UP | + TIPC_NOTIFY_LINK_DOWN | + TIPC_WAKEUP_BCAST_USERS); + spin_unlock_bh(&node->lock); while (!skb_queue_empty(&waiting_sks)) @@ -588,6 +590,14 @@ void tipc_node_unlock(struct tipc_node *node) if (flags & TIPC_WAKEUP_BCAST_USERS) tipc_bclink_wakeup_users(); - if (addr) + if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(addr); + + if (flags & TIPC_NOTIFY_LINK_UP) + tipc_nametbl_publish(TIPC_LINK_STATE, addr, addr, + TIPC_NODE_SCOPE, link_id, addr); + + if (flags & TIPC_NOTIFY_LINK_DOWN) + tipc_nametbl_withdraw(TIPC_LINK_STATE, addr, + link_id, addr); } diff --git a/net/tipc/node.h b/net/tipc/node.h index 67513c3..04e9145 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,6 +53,7 @@ * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down * TIPC_NOTIFY_NODE_DOWN: notify node is down * TIPC_NOTIFY_NODE_UP: notify node is up + * TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type */ enum { TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1), @@ -60,7 +61,9 @@ enum { TIPC_NOTIFY_NODE_DOWN = (1 << 3), TIPC_NOTIFY_NODE_UP = (1 << 4), TIPC_WAKEUP_USERS = (1 << 5), - TIPC_WAKEUP_BCAST_USERS = (1 << 6) + TIPC_WAKEUP_BCAST_USERS = (1 << 6), + TIPC_NOTIFY_LINK_UP = (1 << 7), + TIPC_NOTIFY_LINK_DOWN = (1 << 8) }; /** @@ -100,6 +103,7 @@ struct tipc_node_bclink { * @working_links: number of working links to node (both active and standby) * @link_cnt: number of links to node * @signature: node instance identifier + * @link_id: local and remote bearer ids of changing link, if any * @nsub: list of "node down" subscriptions monitoring node * @rcu: rcu struct for tipc_node */ @@ -116,6 +120,7 @@ struct tipc_node { int link_cnt; int working_links; u32 signature; + u32 link_id; struct list_head nsub; struct sk_buff_head waiting_sks; struct list_head conn_sks; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75275c5..51bddc2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1776,7 +1776,7 @@ int tipc_sk_rcv(struct sk_buff *buf) sk = &tsk->sk; /* Queue message */ - bh_lock_sock(sk); + spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user(sk)) { rc = filter_rcv(sk, buf); @@ -1787,7 +1787,7 @@ int tipc_sk_rcv(struct sk_buff *buf) if (sk_add_backlog(sk, buf, limit)) rc = -TIPC_ERR_OVERLOAD; } - bh_unlock_sock(sk); + spin_unlock_bh(&sk->sk_lock.slock); tipc_sk_put(tsk); if (likely(!rc)) return 0; @@ -2673,7 +2673,7 @@ static int tipc_ioctl(struct socket *sk, unsigned int cmd, unsigned long arg) case SIOCGETLINKNAME: if (copy_from_user(&lnr, argp, sizeof(lnr))) return -EFAULT; - if (!tipc_node_get_linkname(lnr.bearer_id, lnr.peer, + if (!tipc_node_get_linkname(lnr.bearer_id & 0xffff, lnr.peer, lnr.linkname, TIPC_MAX_LINK_NAME)) { if (copy_to_user(argp, &lnr, sizeof(lnr))) return -EFAULT; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 499d6c1..7c53285 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -157,6 +157,8 @@ static int xfrm_output_gso(struct sk_buff *skb) kfree_skb(skb); if (IS_ERR(segs)) return PTR_ERR(segs); + if (segs == NULL) + return -EINVAL; do { struct sk_buff *nskb = segs->next; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4c4e457..88bf289 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } |