diff options
66 files changed, 524 insertions, 383 deletions
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index c6af4ba..54f1047 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -199,16 +199,9 @@ frame header. TX limitations -------------- -Kernel processing usually involves validation of the message received by -user-space, then processing its contents. The kernel must assure that -userspace is not able to modify the message contents after they have been -validated. In order to do so, the message is copied from the ring frame -to an allocated buffer if either of these conditions is false: - -- only a single mapping of the ring exists -- the file descriptor is not shared between processes - -This means that for threaded programs, the kernel will fall back to copying. +As of Jan 2015 the message is always copied from the ring frame to an +allocated buffer due to unresolved security concerns. +See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX."). Example ------- diff --git a/drivers/isdn/hardware/eicon/message.c b/drivers/isdn/hardware/eicon/message.c index 0b38060..d7c2866 100644 --- a/drivers/isdn/hardware/eicon/message.c +++ b/drivers/isdn/hardware/eicon/message.c @@ -1474,7 +1474,7 @@ static byte connect_res(dword Id, word Number, DIVA_CAPI_ADAPTER *a, add_ai(plci, &parms[5]); sig_req(plci, REJECT, 0); } - else if (Reject == 1 || Reject > 9) + else if (Reject == 1 || Reject >= 9) { add_ai(plci, &parms[5]); sig_req(plci, HANGUP, 0); diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 5e40a8b..b3b922a 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1415,7 +1415,6 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev, cfhsi = netdev_priv(dev); cfhsi_netlink_parms(data, cfhsi); - dev_net_set(cfhsi->ndev, src_net); get_ops = symbol_get(cfhsi_get_ops); if (!get_ops) { diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index 7a5e4aa..77f1f60 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -45,7 +45,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" - depends on ISA && ISA_DMA_API + depends on ISA && ISA_DMA_API && !ARM ---help--- If you have a network (Ethernet) card of this type, say Y and read the Ethernet-HOWTO, available from @@ -142,7 +142,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" - depends on ISA && ISA_DMA_API + depends on ISA && ISA_DMA_API && !ARM ---help--- If you have a network (Ethernet) card of this type, say Y and read the Ethernet-HOWTO, available from diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c index 5b22764..27245ef 100644 --- a/drivers/net/ethernet/amd/nmclan_cs.c +++ b/drivers/net/ethernet/amd/nmclan_cs.c @@ -952,6 +952,8 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) do { /* WARNING: MACE_IR is a READ/CLEAR port! */ status = inb(ioaddr + AM2150_MACE_BASE + MACE_IR); + if (!(status & ~MACE_IMR_DEFAULT) && IntrCnt == MACE_MAX_IR_ITERATIONS) + return IRQ_NONE; pr_debug("mace_interrupt: irq 0x%X status 0x%X.\n", irq, status); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 7bb5f07..e5ffb2c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -523,6 +523,7 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) hw_feat->sph = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, SPHEN); hw_feat->tso = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, TSOEN); hw_feat->dma_debug = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, DBGMEMA); + hw_feat->rss = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, RSSEN); hw_feat->tc_cnt = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, NUMTC); hw_feat->hash_table_size = XGMAC_GET_BITS(mac_hfr1, MAC_HWF1R, HASHTBLSZ); @@ -552,13 +553,14 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) break; } - /* The Queue and Channel counts are zero based so increment them + /* The Queue, Channel and TC counts are zero based so increment them * to get the actual number */ hw_feat->rx_q_cnt++; hw_feat->tx_q_cnt++; hw_feat->rx_ch_cnt++; hw_feat->tx_ch_cnt++; + hw_feat->tc_cnt++; DBGPR("<--xgbe_get_all_hw_features\n"); } diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 83a5028..793f3b7 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -369,6 +369,8 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, if (unlikely(xgene_enet_is_desc_slot_empty(raw_desc))) break; + /* read fpqnum field after dataaddr field */ + dma_rmb(); if (is_rx_desc(raw_desc)) ret = xgene_enet_rx_frame(ring, raw_desc); else diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index 7403dff..905ac5f 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -32,7 +32,8 @@ config CS89x0 will be called cs89x0. config CS89x0_PLATFORM - bool "CS89x0 platform driver support" + bool "CS89x0 platform driver support" if HAS_IOPORT_MAP + default !HAS_IOPORT_MAP depends on CS89x0 help Say Y to compile the cs89x0 driver as a platform driver. This diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 3e1a9c1..fda12fb 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1586,7 +1586,7 @@ static int gfar_write_filer_table(struct gfar_private *priv, return -EBUSY; /* Fill regular entries */ - for (; i < MAX_FILER_IDX - 1 && (tab->fe[i].ctrl | tab->fe[i].ctrl); + for (; i < MAX_FILER_IDX - 1 && (tab->fe[i].ctrl | tab->fe[i].prop); i++) gfar_write_filer(priv, i, tab->fe[i].ctrl, tab->fe[i].prop); /* Fill the rest with fall-troughs */ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 63c807c..edea13b 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1907,7 +1907,8 @@ static void igbvf_watchdog_task(struct work_struct *work) static int igbvf_tso(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, u8 *hdr_len) + struct sk_buff *skb, u32 tx_flags, u8 *hdr_len, + __be16 protocol) { struct e1000_adv_tx_context_desc *context_desc; struct igbvf_buffer *buffer_info; @@ -1927,7 +1928,7 @@ static int igbvf_tso(struct igbvf_adapter *adapter, l4len = tcp_hdrlen(skb); *hdr_len += l4len; - if (skb->protocol == htons(ETH_P_IP)) { + if (protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; @@ -1958,7 +1959,7 @@ static int igbvf_tso(struct igbvf_adapter *adapter, /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); - if (skb->protocol == htons(ETH_P_IP)) + if (protocol == htons(ETH_P_IP)) tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; @@ -1984,7 +1985,8 @@ static int igbvf_tso(struct igbvf_adapter *adapter, static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, struct igbvf_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags) + struct sk_buff *skb, u32 tx_flags, + __be16 protocol) { struct e1000_adv_tx_context_desc *context_desc; unsigned int i; @@ -2011,7 +2013,7 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter, tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); if (skb->ip_summed == CHECKSUM_PARTIAL) { - switch (skb->protocol) { + switch (protocol) { case htons(ETH_P_IP): tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; if (ip_hdr(skb)->protocol == IPPROTO_TCP) @@ -2211,6 +2213,7 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, u8 hdr_len = 0; int count = 0; int tso = 0; + __be16 protocol = vlan_get_protocol(skb); if (test_bit(__IGBVF_DOWN, &adapter->state)) { dev_kfree_skb_any(skb); @@ -2239,13 +2242,13 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, tx_flags |= (vlan_tx_tag_get(skb) << IGBVF_TX_FLAGS_VLAN_SHIFT); } - if (skb->protocol == htons(ETH_P_IP)) + if (protocol == htons(ETH_P_IP)) tx_flags |= IGBVF_TX_FLAGS_IPV4; first = tx_ring->next_to_use; tso = skb_is_gso(skb) ? - igbvf_tso(adapter, tx_ring, skb, tx_flags, &hdr_len) : 0; + igbvf_tso(adapter, tx_ring, skb, tx_flags, &hdr_len, protocol) : 0; if (unlikely(tso < 0)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -2253,7 +2256,7 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb, if (tso) tx_flags |= IGBVF_TX_FLAGS_TSO; - else if (igbvf_tx_csum(adapter, tx_ring, skb, tx_flags) && + else if (igbvf_tx_csum(adapter, tx_ring, skb, tx_flags, protocol) && (skb->ip_summed == CHECKSUM_PARTIAL)) tx_flags |= IGBVF_TX_FLAGS_CSUM; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2ed2c7d..67b02bd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7227,11 +7227,11 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (!vhdr) goto out_drop; - protocol = vhdr->h_vlan_encapsulated_proto; tx_flags |= ntohs(vhdr->h_vlan_TCI) << IXGBE_TX_FLAGS_VLAN_SHIFT; tx_flags |= IXGBE_TX_FLAGS_SW_VLAN; } + protocol = vlan_get_protocol(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && adapter->ptp_clock && diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 62a0d8e..38c7a0b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3099,7 +3099,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; - if (skb->protocol == htons(ETH_P_IP)) { + if (first->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; @@ -3156,7 +3156,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, if (skb->ip_summed == CHECKSUM_PARTIAL) { u8 l4_hdr = 0; - switch (skb->protocol) { + switch (first->protocol) { case htons(ETH_P_IP): vlan_macip_lens |= skb_network_header_len(skb); type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bdd4eea..210691c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -235,7 +235,8 @@ do { \ extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; -#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) +#define MLX4_MAX_NUM_SLAVES (min(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF, \ + MLX4_MFUNC_MAX)) #define ALL_SLAVES 0xff struct mlx4_bitmap { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 18e5de7..4e1f58c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -967,7 +967,12 @@ static int qlcnic_poll(struct napi_struct *napi, int budget) tx_complete = qlcnic_process_cmd_ring(adapter, tx_ring, budget); work_done = qlcnic_process_rcv_ring(sds_ring, budget); - if ((work_done < budget) && tx_complete) { + + /* Check if we need a repoll */ + if (!tx_complete) + work_done = budget; + + if (work_done < budget) { napi_complete(&sds_ring->napi); if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) { qlcnic_enable_sds_intr(adapter, sds_ring); @@ -992,6 +997,9 @@ static int qlcnic_tx_poll(struct napi_struct *napi, int budget) napi_complete(&tx_ring->napi); if (test_bit(__QLCNIC_DEV_UP, &adapter->state)) qlcnic_enable_tx_intr(adapter, tx_ring); + } else { + /* As qlcnic_process_cmd_ring() returned 0, we need a repoll*/ + work_done = budget; } return work_done; @@ -1950,7 +1958,12 @@ static int qlcnic_83xx_msix_sriov_vf_poll(struct napi_struct *napi, int budget) tx_complete = qlcnic_process_cmd_ring(adapter, tx_ring, budget); work_done = qlcnic_83xx_process_rcv_ring(sds_ring, budget); - if ((work_done < budget) && tx_complete) { + + /* Check if we need a repoll */ + if (!tx_complete) + work_done = budget; + + if (work_done < budget) { napi_complete(&sds_ring->napi); qlcnic_enable_sds_intr(adapter, sds_ring); } @@ -1973,7 +1986,12 @@ static int qlcnic_83xx_poll(struct napi_struct *napi, int budget) tx_complete = qlcnic_process_cmd_ring(adapter, tx_ring, budget); work_done = qlcnic_83xx_process_rcv_ring(sds_ring, budget); - if ((work_done < budget) && tx_complete) { + + /* Check if we need a repoll */ + if (!tx_complete) + work_done = budget; + + if (work_done < budget) { napi_complete(&sds_ring->napi); qlcnic_enable_sds_intr(adapter, sds_ring); } @@ -1995,6 +2013,9 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget) napi_complete(&tx_ring->napi); if (test_bit(__QLCNIC_DEV_UP , &adapter->state)) qlcnic_enable_tx_intr(adapter, tx_ring); + } else { + /* need a repoll */ + work_done = budget; } return work_done; diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 6c904a6..ef5aed3 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -2351,23 +2351,29 @@ static int qlge_update_hw_vlan_features(struct net_device *ndev, { struct ql_adapter *qdev = netdev_priv(ndev); int status = 0; + bool need_restart = netif_running(ndev); - status = ql_adapter_down(qdev); - if (status) { - netif_err(qdev, link, qdev->ndev, - "Failed to bring down the adapter\n"); - return status; + if (need_restart) { + status = ql_adapter_down(qdev); + if (status) { + netif_err(qdev, link, qdev->ndev, + "Failed to bring down the adapter\n"); + return status; + } } /* update the features with resent change */ ndev->features = features; - status = ql_adapter_up(qdev); - if (status) { - netif_err(qdev, link, qdev->ndev, - "Failed to bring up the adapter\n"); - return status; + if (need_restart) { + status = ql_adapter_up(qdev); + if (status) { + netif_err(qdev, link, qdev->ndev, + "Failed to bring up the adapter\n"); + return status; + } } + return status; } diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index d2835bf..3699b98 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1119,6 +1119,7 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) skb_shinfo(nskb)->gso_size = skb_shinfo(skb)->gso_size; skb_shinfo(nskb)->gso_type = skb_shinfo(skb)->gso_type; } + nskb->queue_mapping = skb->queue_mapping; dev_kfree_skb(skb); skb = nskb; } diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 9f49c01..7cd4eb3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -716,7 +716,7 @@ int netvsc_send(struct hv_device *device, u64 req_id; unsigned int section_index = NETVSC_INVALID_INDEX; u32 msg_size = 0; - struct sk_buff *skb; + struct sk_buff *skb = NULL; u16 q_idx = packet->q_idx; @@ -743,8 +743,6 @@ int netvsc_send(struct hv_device *device, packet); skb = (struct sk_buff *) (unsigned long)packet->send_completion_tid; - if (skb) - dev_kfree_skb_any(skb); packet->page_buf_cnt = 0; } } @@ -810,6 +808,13 @@ int netvsc_send(struct hv_device *device, packet, ret); } + if (ret != 0) { + if (section_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, section_index); + } else if (skb) { + dev_kfree_skb_any(skb); + } + return ret; } diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 7df2217..919f4fc 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -17,7 +17,6 @@ #include <linux/fs.h> #include <linux/uio.h> -#include <net/ipv6.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> @@ -81,7 +80,7 @@ static struct cdev macvtap_cdev; static const struct proto_ops macvtap_socket_ops; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6) + NETIF_F_TSO6 | NETIF_F_UFO) #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG) @@ -586,11 +585,7 @@ static int macvtap_skb_from_vnet_hdr(struct macvtap_queue *q, gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - pr_warn_once("macvtap: %s: using disabled UFO feature; please fix this program\n", - current->comm); gso_type = SKB_GSO_UDP; - if (skb->protocol == htons(ETH_P_IPV6)) - ipv6_proxy_select_ident(skb); break; default: return -EINVAL; @@ -636,6 +631,8 @@ static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q, vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (sinfo->gso_type & SKB_GSO_TCP_ECN) @@ -965,6 +962,9 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } + + if (arg & TUN_F_UFO) + feature_mask |= NETIF_F_UFO; } /* tun/tap driver inverts the usage for TSO offloads, where @@ -975,7 +975,7 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ - if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6)) + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; @@ -1090,7 +1090,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | - TUN_F_TSO_ECN)) + TUN_F_TSO_ECN | TUN_F_UFO)) return -EINVAL; rtnl_lock(); diff --git a/drivers/net/ppp/ppp_deflate.c b/drivers/net/ppp/ppp_deflate.c index 602c625..b5edc7f 100644 --- a/drivers/net/ppp/ppp_deflate.c +++ b/drivers/net/ppp/ppp_deflate.c @@ -246,7 +246,7 @@ static int z_compress(void *arg, unsigned char *rptr, unsigned char *obuf, /* * See if we managed to reduce the size of the packet. */ - if (olen < isize) { + if (olen < isize && olen <= osize) { state->stats.comp_bytes += olen; state->stats.comp_packets++; } else { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8c8dc16..10f9e40 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,7 +65,6 @@ #include <linux/nsproxy.h> #include <linux/virtio_net.h> #include <linux/rcupdate.h> -#include <net/ipv6.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> @@ -187,7 +186,7 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6) + NETIF_F_TSO6|NETIF_F_UFO) int vnet_hdr_sz; int sndbuf; @@ -1167,8 +1166,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } - skb_reset_network_header(skb); - if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1179,20 +1176,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; case VIRTIO_NET_HDR_GSO_UDP: - { - static bool warned; - - if (!warned) { - warned = true; - netdev_warn(tun->dev, - "%s: using disabled UFO feature; please fix this program\n", - current->comm); - } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - if (skb->protocol == htons(ETH_P_IPV6)) - ipv6_proxy_select_ident(skb); break; - } default: tun->dev->stats.rx_frame_errors++; kfree_skb(skb); @@ -1221,6 +1206,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } + skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_hash(skb); @@ -1298,6 +1284,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (sinfo->gso_type & SKB_GSO_UDP) + gso.gso_type = VIRTIO_NET_HDR_GSO_UDP; else { pr_err("unexpected GSO type: " "0x%x, gso_size %d, hdr_len %d\n", @@ -1746,6 +1734,11 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_TSO6; arg &= ~(TUN_F_TSO4|TUN_F_TSO6); } + + if (arg & TUN_F_UFO) { + features |= NETIF_F_UFO; + arg &= ~TUN_F_UFO; + } } /* This gives the user a way to test for new features in future by diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 99b69af..4a1e9c4 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -77,7 +77,7 @@ static int wait_phy_eeprom_ready(struct usbnet *dev, int phy) int ret; udelay(1); - ret = sr_read_reg(dev, EPCR, &tmp); + ret = sr_read_reg(dev, SR_EPCR, &tmp); if (ret < 0) return ret; @@ -98,15 +98,15 @@ static int sr_share_read_word(struct usbnet *dev, int phy, u8 reg, mutex_lock(&dev->phy_mutex); - sr_write_reg(dev, EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); - sr_write_reg(dev, EPCR, phy ? (EPCR_EPOS | EPCR_ERPRR) : EPCR_ERPRR); + sr_write_reg(dev, SR_EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); + sr_write_reg(dev, SR_EPCR, phy ? (EPCR_EPOS | EPCR_ERPRR) : EPCR_ERPRR); ret = wait_phy_eeprom_ready(dev, phy); if (ret < 0) goto out_unlock; - sr_write_reg(dev, EPCR, 0x0); - ret = sr_read(dev, EPDR, 2, value); + sr_write_reg(dev, SR_EPCR, 0x0); + ret = sr_read(dev, SR_EPDR, 2, value); netdev_dbg(dev->net, "read shared %d 0x%02x returned 0x%04x, %d\n", phy, reg, *value, ret); @@ -123,19 +123,19 @@ static int sr_share_write_word(struct usbnet *dev, int phy, u8 reg, mutex_lock(&dev->phy_mutex); - ret = sr_write(dev, EPDR, 2, &value); + ret = sr_write(dev, SR_EPDR, 2, &value); if (ret < 0) goto out_unlock; - sr_write_reg(dev, EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); - sr_write_reg(dev, EPCR, phy ? (EPCR_WEP | EPCR_EPOS | EPCR_ERPRW) : + sr_write_reg(dev, SR_EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); + sr_write_reg(dev, SR_EPCR, phy ? (EPCR_WEP | EPCR_EPOS | EPCR_ERPRW) : (EPCR_WEP | EPCR_ERPRW)); ret = wait_phy_eeprom_ready(dev, phy); if (ret < 0) goto out_unlock; - sr_write_reg(dev, EPCR, 0x0); + sr_write_reg(dev, SR_EPCR, 0x0); out_unlock: mutex_unlock(&dev->phy_mutex); @@ -188,7 +188,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, NSR, &value); + sr_read_reg(dev, SR_NSR, &value); if (value & NSR_LINKST) rc = 1; } @@ -228,7 +228,7 @@ static u32 sr9700_get_link(struct net_device *netdev) int rc = 0; /* Get the Link Status directly */ - sr_read_reg(dev, NSR, &value); + sr_read_reg(dev, SR_NSR, &value); if (value & NSR_LINKST) rc = 1; @@ -281,8 +281,8 @@ static void sr9700_set_multicast(struct net_device *netdev) } } - sr_write_async(dev, MAR, SR_MCAST_SIZE, hashes); - sr_write_reg_async(dev, RCR, rx_ctl); + sr_write_async(dev, SR_MAR, SR_MCAST_SIZE, hashes); + sr_write_reg_async(dev, SR_RCR, rx_ctl); } static int sr9700_set_mac_address(struct net_device *netdev, void *p) @@ -297,7 +297,7 @@ static int sr9700_set_mac_address(struct net_device *netdev, void *p) } memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - sr_write_async(dev, PAR, 6, netdev->dev_addr); + sr_write_async(dev, SR_PAR, 6, netdev->dev_addr); return 0; } @@ -340,7 +340,7 @@ static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf) mii->phy_id_mask = 0x1f; mii->reg_num_mask = 0x1f; - sr_write_reg(dev, NCR, NCR_RST); + sr_write_reg(dev, SR_NCR, NCR_RST); udelay(20); /* read MAC @@ -348,17 +348,17 @@ static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf) * EEPROM automatically to PAR. In case there is no EEPROM externally, * a default MAC address is stored in PAR for making chip work properly. */ - if (sr_read(dev, PAR, ETH_ALEN, netdev->dev_addr) < 0) { + if (sr_read(dev, SR_PAR, ETH_ALEN, netdev->dev_addr) < 0) { netdev_err(netdev, "Error reading MAC address\n"); ret = -ENODEV; goto out; } /* power up and reset phy */ - sr_write_reg(dev, PRR, PRR_PHY_RST); + sr_write_reg(dev, SR_PRR, PRR_PHY_RST); /* at least 10ms, here 20ms for safe */ mdelay(20); - sr_write_reg(dev, PRR, 0); + sr_write_reg(dev, SR_PRR, 0); /* at least 1ms, here 2ms for reading right register */ udelay(2 * 1000); diff --git a/drivers/net/usb/sr9700.h b/drivers/net/usb/sr9700.h index fd687c5..258b030 100644 --- a/drivers/net/usb/sr9700.h +++ b/drivers/net/usb/sr9700.h @@ -14,13 +14,13 @@ /* sr9700 spec. register table on Linux platform */ /* Network Control Reg */ -#define NCR 0x00 +#define SR_NCR 0x00 #define NCR_RST (1 << 0) #define NCR_LBK (3 << 1) #define NCR_FDX (1 << 3) #define NCR_WAKEEN (1 << 6) /* Network Status Reg */ -#define NSR 0x01 +#define SR_NSR 0x01 #define NSR_RXRDY (1 << 0) #define NSR_RXOV (1 << 1) #define NSR_TX1END (1 << 2) @@ -30,7 +30,7 @@ #define NSR_LINKST (1 << 6) #define NSR_SPEED (1 << 7) /* Tx Control Reg */ -#define TCR 0x02 +#define SR_TCR 0x02 #define TCR_CRC_DIS (1 << 1) #define TCR_PAD_DIS (1 << 2) #define TCR_LC_CARE (1 << 3) @@ -38,7 +38,7 @@ #define TCR_EXCECM (1 << 5) #define TCR_LF_EN (1 << 6) /* Tx Status Reg for Packet Index 1 */ -#define TSR1 0x03 +#define SR_TSR1 0x03 #define TSR1_EC (1 << 2) #define TSR1_COL (1 << 3) #define TSR1_LC (1 << 4) @@ -46,7 +46,7 @@ #define TSR1_LOC (1 << 6) #define TSR1_TLF (1 << 7) /* Tx Status Reg for Packet Index 2 */ -#define TSR2 0x04 +#define SR_TSR2 0x04 #define TSR2_EC (1 << 2) #define TSR2_COL (1 << 3) #define TSR2_LC (1 << 4) @@ -54,7 +54,7 @@ #define TSR2_LOC (1 << 6) #define TSR2_TLF (1 << 7) /* Rx Control Reg*/ -#define RCR 0x05 +#define SR_RCR 0x05 #define RCR_RXEN (1 << 0) #define RCR_PRMSC (1 << 1) #define RCR_RUNT (1 << 2) @@ -62,87 +62,87 @@ #define RCR_DIS_CRC (1 << 4) #define RCR_DIS_LONG (1 << 5) /* Rx Status Reg */ -#define RSR 0x06 +#define SR_RSR 0x06 #define RSR_AE (1 << 2) #define RSR_MF (1 << 6) #define RSR_RF (1 << 7) /* Rx Overflow Counter Reg */ -#define ROCR 0x07 +#define SR_ROCR 0x07 #define ROCR_ROC (0x7F << 0) #define ROCR_RXFU (1 << 7) /* Back Pressure Threshold Reg */ -#define BPTR 0x08 +#define SR_BPTR 0x08 #define BPTR_JPT (0x0F << 0) #define BPTR_BPHW (0x0F << 4) /* Flow Control Threshold Reg */ -#define FCTR 0x09 +#define SR_FCTR 0x09 #define FCTR_LWOT (0x0F << 0) #define FCTR_HWOT (0x0F << 4) /* rx/tx Flow Control Reg */ -#define FCR 0x0A +#define SR_FCR 0x0A #define FCR_FLCE (1 << 0) #define FCR_BKPA (1 << 4) #define FCR_TXPEN (1 << 5) #define FCR_TXPF (1 << 6) #define FCR_TXP0 (1 << 7) /* Eeprom & Phy Control Reg */ -#define EPCR 0x0B +#define SR_EPCR 0x0B #define EPCR_ERRE (1 << 0) #define EPCR_ERPRW (1 << 1) #define EPCR_ERPRR (1 << 2) #define EPCR_EPOS (1 << 3) #define EPCR_WEP (1 << 4) /* Eeprom & Phy Address Reg */ -#define EPAR 0x0C +#define SR_EPAR 0x0C #define EPAR_EROA (0x3F << 0) #define EPAR_PHY_ADR_MASK (0x03 << 6) #define EPAR_PHY_ADR (0x01 << 6) /* Eeprom & Phy Data Reg */ -#define EPDR 0x0D /* 0x0D ~ 0x0E for Data Reg Low & High */ +#define SR_EPDR 0x0D /* 0x0D ~ 0x0E for Data Reg Low & High */ /* Wakeup Control Reg */ -#define WCR 0x0F +#define SR_WCR 0x0F #define WCR_MAGICST (1 << 0) #define WCR_LINKST (1 << 2) #define WCR_MAGICEN (1 << 3) #define WCR_LINKEN (1 << 5) /* Physical Address Reg */ -#define PAR 0x10 /* 0x10 ~ 0x15 6 bytes for PAR */ +#define SR_PAR 0x10 /* 0x10 ~ 0x15 6 bytes for PAR */ /* Multicast Address Reg */ -#define MAR 0x16 /* 0x16 ~ 0x1D 8 bytes for MAR */ +#define SR_MAR 0x16 /* 0x16 ~ 0x1D 8 bytes for MAR */ /* 0x1e unused */ /* Phy Reset Reg */ -#define PRR 0x1F +#define SR_PRR 0x1F #define PRR_PHY_RST (1 << 0) /* Tx sdram Write Pointer Address Low */ -#define TWPAL 0x20 +#define SR_TWPAL 0x20 /* Tx sdram Write Pointer Address High */ -#define TWPAH 0x21 +#define SR_TWPAH 0x21 /* Tx sdram Read Pointer Address Low */ -#define TRPAL 0x22 +#define SR_TRPAL 0x22 /* Tx sdram Read Pointer Address High */ -#define TRPAH 0x23 +#define SR_TRPAH 0x23 /* Rx sdram Write Pointer Address Low */ -#define RWPAL 0x24 +#define SR_RWPAL 0x24 /* Rx sdram Write Pointer Address High */ -#define RWPAH 0x25 +#define SR_RWPAH 0x25 /* Rx sdram Read Pointer Address Low */ -#define RRPAL 0x26 +#define SR_RRPAL 0x26 /* Rx sdram Read Pointer Address High */ -#define RRPAH 0x27 +#define SR_RRPAH 0x27 /* Vendor ID register */ -#define VID 0x28 /* 0x28 ~ 0x29 2 bytes for VID */ +#define SR_VID 0x28 /* 0x28 ~ 0x29 2 bytes for VID */ /* Product ID register */ -#define PID 0x2A /* 0x2A ~ 0x2B 2 bytes for PID */ +#define SR_PID 0x2A /* 0x2A ~ 0x2B 2 bytes for PID */ /* CHIP Revision register */ -#define CHIPR 0x2C +#define SR_CHIPR 0x2C /* 0x2D --> 0xEF unused */ /* USB Device Address */ -#define USBDA 0xF0 +#define SR_USBDA 0xF0 #define USBDA_USBFA (0x7F << 0) /* RX packet Counter Reg */ -#define RXC 0xF1 +#define SR_RXC 0xF1 /* Tx packet Counter & USB Status Reg */ -#define TXC_USBS 0xF2 +#define SR_TXC_USBS 0xF2 #define TXC_USBS_TXC0 (1 << 0) #define TXC_USBS_TXC1 (1 << 1) #define TXC_USBS_TXC2 (1 << 2) @@ -150,7 +150,7 @@ #define TXC_USBS_SUSFLAG (1 << 6) #define TXC_USBS_RXFAULT (1 << 7) /* USB Control register */ -#define USBC 0xF4 +#define SR_USBC 0xF4 #define USBC_EP3NAK (1 << 4) #define USBC_EP3ACK (1 << 5) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5ca9771..059fdf1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -490,17 +490,8 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; break; case VIRTIO_NET_HDR_GSO_UDP: - { - static bool warned; - - if (!warned) { - warned = true; - netdev_warn(dev, - "host using disabled UFO feature; please fix it\n"); - } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; break; - } case VIRTIO_NET_HDR_GSO_TCPV6: skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; @@ -888,6 +879,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; else BUG(); if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) @@ -1748,7 +1741,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { - dev->hw_features |= NETIF_F_TSO + dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO | NETIF_F_TSO_ECN | NETIF_F_TSO6; } /* Individual feature bits: what can host handle? */ @@ -1758,9 +1751,11 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_TSO6; if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) dev->hw_features |= NETIF_F_TSO_ECN; + if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO)) + dev->hw_features |= NETIF_F_UFO; if (gso) - dev->features |= dev->hw_features & NETIF_F_ALL_TSO; + dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO); /* (!csum && gso) case will be fixed by register_netdev() */ } if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM)) @@ -1798,7 +1793,8 @@ static int virtnet_probe(struct virtio_device *vdev) /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) @@ -1994,9 +1990,9 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, - VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, - VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7fbd89f..a8c755d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2432,10 +2432,10 @@ static void vxlan_sock_work(struct work_struct *work) dev_put(vxlan->dev); } -static int vxlan_newlink(struct net *net, struct net_device *dev, +static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_rdst *dst = &vxlan->default_dst; __u32 vni; @@ -2445,7 +2445,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (!data[IFLA_VXLAN_ID]) return -EINVAL; - vxlan->net = dev_net(dev); + vxlan->net = src_net; vni = nla_get_u32(data[IFLA_VXLAN_ID]); dst->remote_vni = vni; @@ -2481,7 +2481,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (data[IFLA_VXLAN_LINK] && (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) { struct net_device *lowerdev - = __dev_get_by_index(net, dst->remote_ifindex); + = __dev_get_by_index(src_net, dst->remote_ifindex); if (!lowerdev) { pr_info("ifindex %d does not exist\n", dst->remote_ifindex); @@ -2557,7 +2557,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; - if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, + if (vxlan_find_vni(src_net, vni, use_ipv6 ? AF_INET6 : AF_INET, vxlan->dst_port)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 94e2349..a2fdd15 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -25,7 +25,7 @@ if WAN # There is no way to detect a comtrol sv11 - force it modular for now. config HOSTESS_SV11 tristate "Comtrol Hostess SV-11 support" - depends on ISA && m && ISA_DMA_API && INET && HDLC + depends on ISA && m && ISA_DMA_API && INET && HDLC && VIRT_TO_BUS help Driver for Comtrol Hostess SV-11 network card which operates on low speed synchronous serial links at up to @@ -37,7 +37,7 @@ config HOSTESS_SV11 # The COSA/SRP driver has not been tested as non-modular yet. config COSA tristate "COSA/SRP sync serial boards support" - depends on ISA && m && ISA_DMA_API && HDLC + depends on ISA && m && ISA_DMA_API && HDLC && VIRT_TO_BUS ---help--- Driver for COSA and SRP synchronous serial boards. @@ -87,7 +87,7 @@ config LANMEDIA # There is no way to detect a Sealevel board. Force it modular config SEALEVEL_4021 tristate "Sealevel Systems 4021 support" - depends on ISA && m && ISA_DMA_API && INET && HDLC + depends on ISA && m && ISA_DMA_API && INET && HDLC && VIRT_TO_BUS help This is a driver for the Sealevel Systems ACB 56 serial I/O adapter. diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 9259a73..037f74f 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -578,6 +578,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, goto err_rx_unbind; } queue->task = task; + get_task_struct(task); task = kthread_create(xenvif_dealloc_kthread, (void *)queue, "%s-dealloc", queue->name); @@ -634,6 +635,7 @@ void xenvif_disconnect(struct xenvif *vif) if (queue->task) { kthread_stop(queue->task); + put_task_struct(queue->task); queue->task = NULL; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 908e65e..c8ce701 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2109,8 +2109,7 @@ int xenvif_kthread_guest_rx(void *data) */ if (unlikely(vif->disabled && queue->id == 0)) { xenvif_carrier_off(vif); - xenvif_rx_queue_purge(queue); - continue; + break; } if (!skb_queue_empty(&queue->rx_queue)) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index d415d69..9484d56 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -650,8 +650,10 @@ static void handle_rx(struct vhost_net *net) break; } /* TODO: Should check and handle checksum. */ + + hdr.num_buffers = cpu_to_vhost16(vq, headcount); if (likely(mergeable) && - memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, + memcpy_toiovecend(nvq->hdr, (void *)&hdr.num_buffers, offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 515a35e2..960e666 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -472,27 +472,59 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) /** * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query + * @type: first vlan protocol + * @depth: buffer to store length of eth and vlan tags in bytes * * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(const struct sk_buff *skb) +static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, + int *depth) { - __be16 protocol = 0; - - if (vlan_tx_tag_present(skb) || - skb->protocol != cpu_to_be16(ETH_P_8021Q)) - protocol = skb->protocol; - else { - __be16 proto, *protop; - protop = skb_header_pointer(skb, offsetof(struct vlan_ethhdr, - h_vlan_encapsulated_proto), - sizeof(proto), &proto); - if (likely(protop)) - protocol = *protop; + unsigned int vlan_depth = skb->mac_len; + + /* if type is 802.1Q/AD then the header should already be + * present at mac_len - VLAN_HLEN (if mac_len > 0), or at + * ETH_HLEN otherwise + */ + if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (vlan_depth) { + if (WARN_ON(vlan_depth < VLAN_HLEN)) + return 0; + vlan_depth -= VLAN_HLEN; + } else { + vlan_depth = ETH_HLEN; + } + do { + struct vlan_hdr *vh; + + if (unlikely(!pskb_may_pull(skb, + vlan_depth + VLAN_HLEN))) + return 0; + + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; + } while (type == htons(ETH_P_8021Q) || + type == htons(ETH_P_8021AD)); } - return protocol; + if (depth) + *depth = vlan_depth; + + return type; +} + +/** + * vlan_get_protocol - get protocol EtherType. + * @skb: skbuff to query + * + * Returns the EtherType of the packet, regardless of whether it is + * vlan encapsulated (normal or hardware accelerated) or not. + */ +static inline __be16 vlan_get_protocol(struct sk_buff *skb) +{ + return __vlan_get_protocol(skb, skb->protocol, NULL); } static inline void vlan_set_encap_proto(struct sk_buff *skb, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 25c791e..5f3a9aa 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -97,7 +97,7 @@ enum { MLX4_MAX_NUM_PF = 16, MLX4_MAX_NUM_VF = 126, MLX4_MAX_NUM_VF_P_PORT = 64, - MLX4_MFUNC_MAX = 80, + MLX4_MFUNC_MAX = 128, MLX4_MAX_EQ_NUM = 1024, MLX4_MFUNC_EQ_NUM = 4, MLX4_MFUNC_MAX_EQES = 8, diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h index 7ee2df0..dc8fd81 100644 --- a/include/net/flow_keys.h +++ b/include/net/flow_keys.h @@ -22,9 +22,9 @@ struct flow_keys { __be32 ports; __be16 port16[2]; }; - u16 thoff; - u16 n_proto; - u8 ip_proto; + u16 thoff; + __be16 n_proto; + u8 ip_proto; }; bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, diff --git a/include/net/ip.h b/include/net/ip.h index f7cbd70..09cf5ae 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -181,7 +181,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; } -void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 4292929..6e416f6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,6 +671,9 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } +u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, + struct in6_addr *src); +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); void ipv6_proxy_select_ident(struct sk_buff *skb); int ip6_dst_hoplimit(struct dst_entry *dst); @@ -708,7 +711,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, __be32 flowlabel, bool autolabel) { if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) { - __be32 hash; + u32 hash; hash = skb_get_hash(skb); @@ -718,7 +721,7 @@ static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb, */ hash ^= hash >> 12; - flowlabel = hash & IPV6_FLOWLABEL_MASK; + flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK; } return flowlabel; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 3ae969e..9eaaa78 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -530,6 +530,8 @@ enum nft_chain_type { int nft_chain_validate_dependency(const struct nft_chain *chain, enum nft_chain_type type); +int nft_chain_validate_hooks(const struct nft_chain *chain, + unsigned int hook_flags); struct nft_stats { u64 bytes; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 24945ce..0ffef1a 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -52,6 +52,7 @@ struct netns_ipv4 { struct inet_peer_base *peers; struct tcpm_hash_bucket *tcp_metrics_hash; unsigned int tcp_metrics_hash_log; + struct sock * __percpu *tcp_sk; struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3d282cb..c605d30 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -79,6 +79,9 @@ struct Qdisc { struct netdev_queue *dev_queue; struct gnet_stats_rate_est64 rate_est; + struct gnet_stats_basic_cpu __percpu *cpu_bstats; + struct gnet_stats_queue __percpu *cpu_qstats; + struct Qdisc *next_sched; struct sk_buff *gso_skb; /* @@ -86,15 +89,9 @@ struct Qdisc { */ unsigned long state; struct sk_buff_head q; - union { - struct gnet_stats_basic_packed bstats; - struct gnet_stats_basic_cpu __percpu *cpu_bstats; - } __packed; + struct gnet_stats_basic_packed bstats; unsigned int __state; - union { - struct gnet_stats_queue qstats; - struct gnet_stats_queue __percpu *cpu_qstats; - } __packed; + struct gnet_stats_queue qstats; struct rcu_head rcu_head; int padded; atomic_t refcnt; diff --git a/include/net/tcp.h b/include/net/tcp.h index f50f29faf..9d9111e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -834,8 +834,8 @@ void tcp_get_available_congestion_control(char *buf, size_t len); void tcp_get_allowed_congestion_control(char *buf, size_t len); int tcp_set_allowed_congestion_control(char *allowed); int tcp_set_congestion_control(struct sock *sk, const char *name); -void tcp_slow_start(struct tcp_sock *tp, u32 acked); -void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w); +u32 tcp_slow_start(struct tcp_sock *tp, u32 acked); +void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked); u32 tcp_reno_ssthresh(struct sock *sk); void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked); diff --git a/lib/checksum.c b/lib/checksum.c index 129775e..8b39e86 100644 --- a/lib/checksum.c +++ b/lib/checksum.c @@ -181,6 +181,15 @@ csum_partial_copy(const void *src, void *dst, int len, __wsum sum) EXPORT_SYMBOL(csum_partial_copy); #ifndef csum_tcpudp_nofold +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, @@ -195,8 +204,7 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, #else s += (proto + len) << 8; #endif - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); } EXPORT_SYMBOL(csum_tcpudp_nofold); #endif diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index b0330ae..3244aea 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -265,22 +265,12 @@ out: data[NFT_REG_VERDICT].verdict = NF_DROP; } -static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { - struct nft_base_chain *basechain; - - if (chain->flags & NFT_BASE_CHAIN) { - basechain = nft_base_chain(chain); - - switch (basechain->ops[0].hooknum) { - case NF_BR_PRE_ROUTING: - case NF_BR_LOCAL_IN: - break; - default: - return -EOPNOTSUPP; - } - } - return 0; + return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN)); } static int nft_reject_bridge_init(const struct nft_ctx *ctx, @@ -290,7 +280,7 @@ static int nft_reject_bridge_init(const struct nft_ctx *ctx, struct nft_reject *priv = nft_expr_priv(expr); int icmp_code, err; - err = nft_reject_bridge_validate_hooks(ctx->chain); + err = nft_reject_bridge_validate(ctx, expr, NULL); if (err < 0) return err; @@ -341,13 +331,6 @@ nla_put_failure: return -1; } -static int nft_reject_bridge_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_reject_bridge_validate_hooks(ctx->chain); -} - static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 4589ff67..67a4a36 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -470,7 +470,6 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); - dev_net_set(caifdev->netdev, src_net); ret = register_netdevice(dev); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index 171420e..7fe8292 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2352,7 +2352,6 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { - unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; /* Tunnel gso handlers can set protocol to ethernet. */ @@ -2366,35 +2365,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - /* if skb->protocol is 802.1Q/AD then the header should already be - * present at mac_len - VLAN_HLEN (if mac_len > 0), or at - * ETH_HLEN otherwise - */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - if (vlan_depth) { - if (WARN_ON(vlan_depth < VLAN_HLEN)) - return 0; - vlan_depth -= VLAN_HLEN; - } else { - vlan_depth = ETH_HLEN; - } - do { - struct vlan_hdr *vh; - - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) - return 0; - - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); - } - - *depth = vlan_depth; - - return type; + return __vlan_get_protocol(skb, type, depth); } /** @@ -5323,7 +5294,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void netdev_adjacent_add_links(struct net_device *dev) +static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -5348,7 +5319,7 @@ void netdev_adjacent_add_links(struct net_device *dev) } } -void netdev_adjacent_del_links(struct net_device *dev) +static void netdev_adjacent_del_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -6656,7 +6627,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) if (!queue) return NULL; netdev_init_one_queue(dev, queue, NULL); - queue->qdisc = &noop_qdisc; + RCU_INIT_POINTER(queue->qdisc, &noop_qdisc); queue->qdisc_sleeping = &noop_qdisc; rcu_assign_pointer(dev->ingress_queue, queue); #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9cf6fe9..446cbaf 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2895,12 +2895,16 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) goto errout; } + if (!skb->len) + goto errout; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; errout: WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + if (err) + rtnl_set_sk_err(net, RTNLGRP_LINK, err); return err; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b50861b..c373c07 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1506,23 +1506,8 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset, /* * Generic function to send a packet as reply to another packet. * Used to send some TCP resets/acks so far. - * - * Use a fake percpu inet socket to avoid false sharing and contention. */ -static DEFINE_PER_CPU(struct inet_sock, unicast_sock) = { - .sk = { - .__sk_common = { - .skc_refcnt = ATOMIC_INIT(1), - }, - .sk_wmem_alloc = ATOMIC_INIT(1), - .sk_allocation = GFP_ATOMIC, - .sk_flags = (1UL << SOCK_USE_WRITE_QUEUE), - }, - .pmtudisc = IP_PMTUDISC_WANT, - .uc_ttl = -1, -}; - -void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, +void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, @@ -1532,9 +1517,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, struct ipcm_cookie ipc; struct flowi4 fl4; struct rtable *rt = skb_rtable(skb); + struct net *net = sock_net(sk); struct sk_buff *nskb; - struct sock *sk; - struct inet_sock *inet; int err; if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) @@ -1565,15 +1549,11 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet = &get_cpu_var(unicast_sock); + inet_sk(sk)->tos = arg->tos; - inet->tos = arg->tos; - sk = &inet->sk; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; - sock_net_set(sk, net); - __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, &ipc, &rt, MSG_DONTWAIT); @@ -1589,13 +1569,10 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - skb_orphan(nskb); skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } out: - put_cpu_var(unicast_sock); - ip_rt_put(rt); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d58dd0e..52e1f2b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -966,6 +966,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (dst->dev->mtu < mtu) return; + if (rt->rt_pmtu && rt->rt_pmtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index bb395d4..c037644 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -150,7 +150,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) tcp_slow_start(tp, acked); else { bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt); + tcp_cong_avoid_ai(tp, ca->cnt, 1); } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 27ead0d..8670e68 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -291,26 +291,32 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and * returns the leftover acks to adjust cwnd in congestion avoidance mode. */ -void tcp_slow_start(struct tcp_sock *tp, u32 acked) +u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { u32 cwnd = tp->snd_cwnd + acked; if (cwnd > tp->snd_ssthresh) cwnd = tp->snd_ssthresh + 1; + acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + + return acked; } EXPORT_SYMBOL_GPL(tcp_slow_start); -/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ -void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) +/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), + * for every packet that was ACKed. + */ +void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { + tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else { - tp->snd_cwnd_cnt++; + u32 delta = tp->snd_cwnd_cnt / w; + + tp->snd_cwnd_cnt -= delta * w; + tp->snd_cwnd += delta; } + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); } EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); @@ -329,11 +335,13 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In "safe" area, increase. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) - tcp_slow_start(tp, acked); + if (tp->snd_cwnd <= tp->snd_ssthresh) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } /* In dangerous area, increase slowly. */ - else - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 6b60024..4b276d1 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -93,9 +93,7 @@ struct bictcp { u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ -#define ACK_RATIO_SHIFT 4 -#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) - u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ + u16 unused; u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ @@ -114,7 +112,6 @@ static inline void bictcp_reset(struct bictcp *ca) ca->bic_K = 0; ca->delay_min = 0; ca->epoch_start = 0; - ca->delayed_ack = 2 << ACK_RATIO_SHIFT; ca->ack_cnt = 0; ca->tcp_cwnd = 0; ca->found = 0; @@ -205,23 +202,30 @@ static u32 cubic_root(u64 a) /* * Compute congestion window to use. */ -static inline void bictcp_update(struct bictcp *ca, u32 cwnd) +static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) { u32 delta, bic_target, max_cnt; u64 offs, t; - ca->ack_cnt++; /* count the number of ACKs */ + ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && (s32)(tcp_time_stamp - ca->last_time) <= HZ / 32) return; + /* The CUBIC function can update ca->cnt at most once per jiffy. + * On all cwnd reduction events, ca->epoch_start is set to 0, + * which will force a recalculation of ca->cnt. + */ + if (ca->epoch_start && tcp_time_stamp == ca->last_time) + goto tcp_friendliness; + ca->last_cwnd = cwnd; ca->last_time = tcp_time_stamp; if (ca->epoch_start == 0) { ca->epoch_start = tcp_time_stamp; /* record beginning */ - ca->ack_cnt = 1; /* start counting */ + ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ if (ca->last_max_cwnd <= cwnd) { @@ -283,6 +287,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ +tcp_friendliness: /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; @@ -301,7 +306,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } } - ca->cnt = (ca->cnt << ACK_RATIO_SHIFT) / ca->delayed_ack; if (ca->cnt == 0) /* cannot be zero */ ca->cnt = 1; } @@ -317,11 +321,12 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (tp->snd_cwnd <= tp->snd_ssthresh) { if (hystart && after(ack, ca->end_seq)) bictcp_hystart_reset(sk); - tcp_slow_start(tp, acked); - } else { - bictcp_update(ca, tp->snd_cwnd); - tcp_cong_avoid_ai(tp, ca->cnt); + acked = tcp_slow_start(tp, acked); + if (!acked) + return; } + bictcp_update(ca, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, ca->cnt, acked); } static u32 bictcp_recalc_ssthresh(struct sock *sk) @@ -411,20 +416,10 @@ static void hystart_update(struct sock *sk, u32 delay) */ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) { - const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; - if (icsk->icsk_ca_state == TCP_CA_Open) { - u32 ratio = ca->delayed_ack; - - ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; - ratio += cnt; - - ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT); - } - /* Some calls are for duplicates without timetamps */ if (rtt_us < 0) return; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a3f72d7..d22f544 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -683,7 +683,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.bound_dev_if = sk->sk_bound_dev_if; arg.tos = ip_hdr(skb)->tos; - ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); @@ -767,7 +768,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, if (oif) arg.bound_dev_if = oif; arg.tos = tos; - ip_send_unicast_reply(net, skb, &TCP_SKB_CB(skb)->header.h4.opt, + ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), + skb, &TCP_SKB_CB(skb)->header.h4.opt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len); @@ -2428,14 +2430,39 @@ struct proto tcp_prot = { }; EXPORT_SYMBOL(tcp_prot); +static void __net_exit tcp_sk_exit(struct net *net) +{ + int cpu; + + for_each_possible_cpu(cpu) + inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); + free_percpu(net->ipv4.tcp_sk); +} + static int __net_init tcp_sk_init(struct net *net) { + int res, cpu; + + net->ipv4.tcp_sk = alloc_percpu(struct sock *); + if (!net->ipv4.tcp_sk) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, net); + if (res) + goto fail; + *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; + } net->ipv4.sysctl_tcp_ecn = 2; return 0; -} -static void __net_exit tcp_sk_exit(struct net *net) -{ +fail: + tcp_sk_exit(net); + + return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 6824afb..333bcb2 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -25,7 +25,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp, acked); else - tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), + 1); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index a4d2d2d..112151e 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -159,7 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) /* In the "non-congestive state", increase cwnd * every rtt. */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); } else { /* In the "congestive state", increase cwnd * every other rtt. diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index cd72732..17d3566 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -92,7 +92,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked) } else { /* Reno */ - tcp_cong_avoid_ai(tp, tp->snd_cwnd); + tcp_cong_avoid_ai(tp, tp->snd_cwnd, 1); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 13cda4c..01ccc28 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -417,7 +417,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (code == ICMPV6_HDR_FIELD) teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); - if (teli && teli == info - 2) { + if (teli && teli == be32_to_cpu(info) - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; if (tel->encap_limit == 0) { net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", @@ -429,7 +429,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: - mtu = info - offset; + mtu = be32_to_cpu(info) - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ce69a12..d28f2a2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -537,20 +537,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static u32 ip6_idents_hashrnd __read_mostly; - u32 hash, id; - - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); - hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); - - id = ip_idents_reserve(hash, 1); - fhdr->identification = htonl(id); -} - int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 97f41a3..54520a0 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -9,6 +9,24 @@ #include <net/addrconf.h> #include <net/secure_seq.h> +u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src) +{ + u32 hash, id; + + hash = __ipv6_addr_jhash(dst, hashrnd); + hash = __ipv6_addr_jhash(src, hash); + + /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, + * set the hight order instead thus minimizing possible future + * collisions. + */ + id = ip_idents_reserve(hash, 1); + if (unlikely(!id)) + id = 1 << 31; + + return id; +} + /* This function exists only for tap drivers that must support broken * clients requesting UFO without specifying an IPv6 fragment ID. * @@ -22,7 +40,7 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; - u32 hash, id; + u32 id; addrs = skb_header_pointer(skb, skb_network_offset(skb) + @@ -34,14 +52,25 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); - hash = __ipv6_addr_jhash(&addrs[0], hash); - - id = ip_idents_reserve(hash, 1); - skb_shinfo(skb)->ip6_frag_id = htonl(id); + id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + &addrs[1], &addrs[0]); + skb_shinfo(skb)->ip6_frag_id = id; } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); +void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, + &rt->rt6i_src.addr); + fhdr->identification = htonl(id); +} +EXPORT_SYMBOL(ipv6_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 213546b..cdbfe5a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1506,12 +1506,12 @@ static bool ipip6_netlink_encap_parms(struct nlattr *data[], if (data[IFLA_IPTUN_ENCAP_SPORT]) { ret = true; - ipencap->sport = nla_get_u16(data[IFLA_IPTUN_ENCAP_SPORT]); + ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); } if (data[IFLA_IPTUN_ENCAP_DPORT]) { ret = true; - ipencap->dport = nla_get_u16(data[IFLA_IPTUN_ENCAP_DPORT]); + ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); } return ret; @@ -1707,9 +1707,9 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_SPORT, + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index b6aa8ed..a562769 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -52,6 +52,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + /* Set the IPv6 fragment id if not set yet */ + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(skb); + segs = NULL; goto out; } @@ -108,7 +112,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - fptr->identification = skb_shinfo(skb)->ip6_frag_id; + if (skb_shinfo(skb)->ip6_frag_id) + fptr->identification = skb_shinfo(skb)->ip6_frag_id; + else + ipv6_select_ident(fptr, + (struct rt6_info *)skb_dst(skb)); /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 990decb..b87ca32 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -659,16 +659,24 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +static int ip_vs_route_me_harder(int af, struct sk_buff *skb, + unsigned int hooknum) { + if (!sysctl_snat_reroute(skb)) + return 0; + /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ + if (NF_INET_LOCAL_IN == hooknum) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) + struct dst_entry *dst = skb_dst(skb); + + if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && + ip6_route_me_harder(skb) != 0) return 1; } else #endif - if ((sysctl_snat_reroute(skb) || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) return 1; @@ -791,7 +799,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - unsigned int offset, unsigned int ihl) + unsigned int offset, unsigned int ihl, + unsigned int hooknum) { unsigned int verdict = NF_DROP; @@ -821,7 +830,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto out; /* do the statistics and put it back */ @@ -916,7 +925,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, - pp, ciph.len, ihl); + pp, ciph.len, ihl, hooknum); } #ifdef CONFIG_IP_VS_IPV6 @@ -981,7 +990,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, snet.in6 = ciph.saddr.in6; writable = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr)); + pp, writable, sizeof(struct ipv6hdr), + hooknum); } #endif @@ -1040,7 +1050,8 @@ static inline bool is_new_conn(const struct sk_buff *skb, */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph, + unsigned int hooknum) { struct ip_vs_protocol *pp = pd->pp; @@ -1078,7 +1089,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); @@ -1181,7 +1192,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) cp = pp->conn_out_get(af, skb, &iph, 0); if (likely(cp)) - return handle_response(af, skb, pd, cp, &iph); + return handle_response(af, skb, pd, cp, &iph, hooknum); if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3b3ddb4..1ff04bc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1134,9 +1134,11 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) /* Restore old counters on this cpu, no problem. Per-cpu statistics * are not exposed to userspace. */ + preempt_disable(); stats = this_cpu_ptr(newstats); stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + preempt_enable(); return newstats; } @@ -1262,8 +1264,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, sizeof(struct nft_trans_chain)); - if (trans == NULL) + if (trans == NULL) { + free_percpu(stats); return -ENOMEM; + } nft_trans_chain_stats(trans) = stats; nft_trans_chain_update(trans) = true; @@ -1319,8 +1323,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, hookfn = type->hooks[hooknum]; basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); - if (basechain == NULL) + if (basechain == NULL) { + module_put(type->owner); return -ENOMEM; + } if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); @@ -3753,6 +3759,24 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, } EXPORT_SYMBOL_GPL(nft_chain_validate_dependency); +int nft_chain_validate_hooks(const struct nft_chain *chain, + unsigned int hook_flags) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + if ((1 << basechain->ops[0].hooknum) & hook_flags) + return 0; + + return -EOPNOTSUPP; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_chain_validate_hooks); + /* * Loop detection - walk through the ruleset beginning at the destination chain * of a new jump until either the source chain is reached (loop) or all diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index d1ffd5e..9aea747 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -21,6 +21,21 @@ const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { }; EXPORT_SYMBOL_GPL(nft_masq_policy); +int nft_masq_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_POST_ROUTING)); +} +EXPORT_SYMBOL_GPL(nft_masq_validate); + int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -28,8 +43,8 @@ int nft_masq_init(const struct nft_ctx *ctx, struct nft_masq *priv = nft_expr_priv(expr); int err; - err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); - if (err < 0) + err = nft_masq_validate(ctx, expr, NULL); + if (err) return err; if (tb[NFTA_MASQ_FLAGS] == NULL) @@ -60,12 +75,5 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_masq_dump); -int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} -EXPORT_SYMBOL_GPL(nft_masq_validate); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index aff54fb1..a0837c6 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -88,17 +88,40 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { [NFTA_NAT_FLAGS] = { .type = NLA_U32 }, }; -static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_nat_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) { struct nft_nat *priv = nft_expr_priv(expr); - u32 family; int err; err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); if (err < 0) return err; + switch (priv->type) { + case NFT_NAT_SNAT: + err = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN)); + break; + case NFT_NAT_DNAT: + err = nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT)); + break; + } + + return err; +} + +static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_nat *priv = nft_expr_priv(expr); + u32 family; + int err; + if (tb[NFTA_NAT_TYPE] == NULL || (tb[NFTA_NAT_REG_ADDR_MIN] == NULL && tb[NFTA_NAT_REG_PROTO_MIN] == NULL)) @@ -115,6 +138,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } + err = nft_nat_validate(ctx, expr, NULL); + if (err < 0) + return err; + if (tb[NFTA_NAT_FAMILY] == NULL) return -EINVAL; @@ -219,13 +246,6 @@ nla_put_failure: return -1; } -static int nft_nat_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} - static struct nft_expr_type nft_nat_type; static const struct nft_expr_ops nft_nat_ops = { .type = &nft_nat_type, diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 9e8093f..d7e9e93 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -23,6 +23,22 @@ const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { }; EXPORT_SYMBOL_GPL(nft_redir_policy); +int nft_redir_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + return nft_chain_validate_hooks(ctx->chain, + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT)); +} +EXPORT_SYMBOL_GPL(nft_redir_validate); + int nft_redir_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -30,7 +46,7 @@ int nft_redir_init(const struct nft_ctx *ctx, struct nft_redir *priv = nft_expr_priv(expr); int err; - err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + err = nft_redir_validate(ctx, expr, NULL); if (err < 0) return err; @@ -88,12 +104,5 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_redir_dump); -int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nft_data **data) -{ - return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); -} -EXPORT_SYMBOL_GPL(nft_redir_validate); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>"); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 02fdde2..75532ef 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1438,7 +1438,7 @@ static void netlink_undo_bind(int group, long unsigned int groups, for (undo = 0; undo < group; undo++) if (test_bit(undo, &groups)) - nlk->netlink_unbind(sock_net(sk), undo); + nlk->netlink_unbind(sock_net(sk), undo + 1); } static int netlink_bind(struct socket *sock, struct sockaddr *addr, @@ -1476,7 +1476,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, for (group = 0; group < nlk->ngroups; group++) { if (!test_bit(group, &groups)) continue; - err = nlk->netlink_bind(net, group); + err = nlk->netlink_bind(net, group + 1); if (!err) continue; netlink_undo_bind(group, groups, sk); diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c3b0cd43..c173f69 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -71,14 +71,14 @@ static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index aad6a67..baef987 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -556,8 +556,9 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, } EXPORT_SYMBOL(tcf_exts_change); -#define tcf_exts_first_act(ext) \ - list_first_entry(&(exts)->actions, struct tc_action, list) +#define tcf_exts_first_act(ext) \ + list_first_entry_or_null(&(exts)->actions, \ + struct tc_action, list) int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { @@ -603,7 +604,7 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct tc_action *a = tcf_exts_first_act(exts); - if (tcf_action_copy_stats(skb, a, 1) < 0) + if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0) return -1; #endif return 0; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 9b05924..333cd94 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -670,8 +670,14 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_PLIMIT]) q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]); - if (tb[TCA_FQ_QUANTUM]) - q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); + if (tb[TCA_FQ_QUANTUM]) { + u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); + + if (quantum > 0) + q->quantum = quantum; + else + err = -EINVAL; + } if (tb[TCA_FQ_INITIAL_QUANTUM]) q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e49e231..06320c8 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2608,7 +2608,7 @@ do_addr_param: addr_param = param.v + sizeof(sctp_addip_param_t); - af = sctp_get_af_specific(param_type2af(param.p->type)); + af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (af == NULL) break; |