diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-16 11:28:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-16 11:28:59 -0700 |
commit | 48bddb143befb1dd93c0e5a66af62cfd60c86b04 (patch) | |
tree | 8d71225922209f780d0e0a1f27beaac3e836972b | |
parent | c8503720fd0b952ff25bcc49b6eb9c492e22f3c6 (diff) | |
parent | 8e29f97979c300406c21994986bdfcdb67fe4ff7 (diff) | |
download | op-kernel-dev-48bddb143befb1dd93c0e5a66af62cfd60c86b04.zip op-kernel-dev-48bddb143befb1dd93c0e5a66af62cfd60c86b04.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
1) Fix hotplug deadlock in hv_netvsc, from Stephen Hemminger.
2) Fix double-free in rmnet driver, from Dan Carpenter.
3) INET connection socket layer can double put request sockets, fix
from Eric Dumazet.
4) Don't match collect metadata-mode tunnels if the device is down,
from Haishuang Yan.
5) Do not perform TSO6/GSO on ipv6 packets with extensions headers in
be2net driver, from Suresh Reddy.
6) Fix scaling error in gen_estimator, from Eric Dumazet.
7) Fix 64-bit statistics deadlock in systemport driver, from Florian
Fainelli.
8) Fix use-after-free in sctp_sock_dump, from Xin Long.
9) Reject invalid BPF_END instructions in verifier, from Edward Cree.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (43 commits)
mlxsw: spectrum_router: Only handle IPv4 and IPv6 events
Documentation: link in networking docs
tcp: fix data delivery rate
bpf/verifier: reject BPF_ALU64|BPF_END
sctp: do not mark sk dumped when inet_sctp_diag_fill returns err
sctp: fix an use-after-free issue in sctp_sock_dump
netvsc: increase default receive buffer size
tcp: update skb->skb_mstamp more carefully
net: ipv4: fix l3slave check for index returned in IP_PKTINFO
net: smsc911x: Quieten netif during suspend
net: systemport: Fix 64-bit stats deadlock
net: vrf: avoid gcc-4.6 warning
qed: remove unnecessary call to memset
tg3: clean up redundant initialization of tnapi
tls: make tls_sw_free_resources static
sctp: potential read out of bounds in sctp_ulpevent_type_enabled()
MAINTAINERS: review Renesas DT bindings as well
net_sched: gen_estimator: fix scaling error in bytes/packets samples
nfp: wait for the NSP resource to appear on boot
nfp: wait for board state before talking to the NSP
...
48 files changed, 467 insertions, 234 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index e5e33ba..789b74d 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -45,7 +45,7 @@ in many more places. There's xt_bpf for netfilter, cls_bpf in the kernel qdisc layer, SECCOMP-BPF (SECure COMPuting [1]), and lots of other places such as team driver, PTP code, etc where BPF is being used. - [1] Documentation/prctl/seccomp_filter.txt + [1] Documentation/userspace-api/seccomp_filter.rst Original BPF paper: diff --git a/MAINTAINERS b/MAINTAINERS index ef65785..2281af4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11435,6 +11435,8 @@ RENESAS ETHERNET DRIVERS R: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com> L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/net/renesas,*.txt +F: Documentation/devicetree/bindings/net/sh_eth.txt F: drivers/net/ethernet/renesas/ F: include/linux/sh_eth.h diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index fc63992..c99dc59 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4289,7 +4289,7 @@ static int bond_check_params(struct bond_params *params) int bond_mode = BOND_MODE_ROUNDROBIN; int xmit_hashtype = BOND_XMIT_POLICY_LAYER2; int lacp_fast = 0; - int tlb_dynamic_lb = 0; + int tlb_dynamic_lb; /* Convert string parameters. */ if (mode) { @@ -4601,16 +4601,13 @@ static int bond_check_params(struct bond_params *params) } ad_user_port_key = valptr->value; - if ((bond_mode == BOND_MODE_TLB) || (bond_mode == BOND_MODE_ALB)) { - bond_opt_initstr(&newval, "default"); - valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), - &newval); - if (!valptr) { - pr_err("Error: No tlb_dynamic_lb default value"); - return -EINVAL; - } - tlb_dynamic_lb = valptr->value; + bond_opt_initstr(&newval, "default"); + valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); + if (!valptr) { + pr_err("Error: No tlb_dynamic_lb default value"); + return -EINVAL; } + tlb_dynamic_lb = valptr->value; if (lp_interval == 0) { pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n", diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a12d603..5931aa2 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -754,6 +754,9 @@ static int bond_option_mode_set(struct bonding *bond, bond->params.miimon); } + if (newval->value == BOND_MODE_ALB) + bond->params.tlb_dynamic_lb = 1; + /* don't cache arp_validate between modes */ bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; bond->params.mode = newval->value; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index a6572b5..c3c53f6 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1735,11 +1735,8 @@ static void bcm_sysport_get_stats64(struct net_device *dev, stats->tx_packets += tx_packets; } - /* lockless update tx_bytes and tx_packets */ - u64_stats_update_begin(&priv->syncp); stats64->tx_bytes = stats->tx_bytes; stats64->tx_packets = stats->tx_packets; - u64_stats_update_end(&priv->syncp); do { start = u64_stats_fetch_begin_irq(&priv->syncp); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index af33dc1..656e6af 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11536,11 +11536,11 @@ static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq, tg3_napi_enable(tp); for (i = 0; i < tp->irq_cnt; i++) { - struct tg3_napi *tnapi = &tp->napi[i]; err = tg3_request_irq(tp, i); if (err) { for (i--; i >= 0; i--) { - tnapi = &tp->napi[i]; + struct tg3_napi *tnapi = &tp->napi[i]; + free_irq(tnapi->irq_vec, tnapi); } goto out_napi_fini; diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 674cf9d..8984c49 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -930,6 +930,14 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; } +static inline bool is_ipv6_ext_hdr(struct sk_buff *skb) +{ + if (ip_hdr(skb)->version == 6) + return ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr); + else + return false; +} + #define be_error_recovering(adapter) \ (adapter->flags & BE_FLAGS_TRY_RECOVERY) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 319eee3..0e3d9f39 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5089,6 +5089,20 @@ static netdev_features_t be_features_check(struct sk_buff *skb, struct be_adapter *adapter = netdev_priv(dev); u8 l4_hdr = 0; + if (skb_is_gso(skb)) { + /* IPv6 TSO requests with extension hdrs are a problem + * to Lancer and BE3 HW. Disable TSO6 feature. + */ + if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb)) + features &= ~NETIF_F_TSO6; + + /* Lancer cannot handle the packet with MSS less than 256. + * Disable the GSO support in such cases + */ + if (lancer_chip(adapter) && skb_shinfo(skb)->gso_size < 256) + features &= ~NETIF_F_GSO_MASK; + } + /* The code below restricts offload features for some tunneled and * Q-in-Q packets. * Offload features for normal (non tunnel) packets are unchanged. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ed7cd6c..696b99e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -575,15 +575,14 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) { - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - if (curr->used && curr->local_port == port->local_port) + if (curr->used && curr->local_port == local_port) return curr; } return NULL; @@ -594,7 +593,8 @@ static struct mlxsw_sp_span_entry { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(port); + span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, + port->local_port); if (span_entry) { /* Already exists, just take a reference */ span_entry->ref_count++; @@ -783,12 +783,13 @@ err_port_bind: } static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, + u8 destination_port, enum mlxsw_sp_span_type type) { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(to); + span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, + destination_port); if (!span_entry) { netdev_err(from->dev, "no span entry found\n"); return; @@ -1563,14 +1564,12 @@ static void mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_port_mall_mirror_tc_entry *mirror) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; enum mlxsw_sp_span_type span_type; - struct mlxsw_sp_port *to_port; - to_port = mlxsw_sp->ports[mirror->to_local_port]; span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type); + mlxsw_sp_span_mirror_remove(mlxsw_sp_port, mirror->to_local_port, + span_type); } static int @@ -2545,7 +2544,9 @@ out: return err; } -#define MLXSW_SP_QSFP_I2C_ADDR 0x50 +#define MLXSW_SP_I2C_ADDR_LOW 0x50 +#define MLXSW_SP_I2C_ADDR_HIGH 0x51 +#define MLXSW_SP_EEPROM_PAGE_LENGTH 256 static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, u16 offset, u16 size, void *data, @@ -2554,12 +2555,25 @@ static int mlxsw_sp_query_module_eeprom(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char eeprom_tmp[MLXSW_SP_REG_MCIA_EEPROM_SIZE]; char mcia_pl[MLXSW_REG_MCIA_LEN]; + u16 i2c_addr; int status; int err; size = min_t(u16, size, MLXSW_SP_REG_MCIA_EEPROM_SIZE); + + if (offset < MLXSW_SP_EEPROM_PAGE_LENGTH && + offset + size > MLXSW_SP_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size = MLXSW_SP_EEPROM_PAGE_LENGTH - offset; + + i2c_addr = MLXSW_SP_I2C_ADDR_LOW; + if (offset >= MLXSW_SP_EEPROM_PAGE_LENGTH) { + i2c_addr = MLXSW_SP_I2C_ADDR_HIGH; + offset -= MLXSW_SP_EEPROM_PAGE_LENGTH; + } + mlxsw_reg_mcia_pack(mcia_pl, mlxsw_sp_port->mapping.module, - 0, 0, offset, size, MLXSW_SP_QSFP_I2C_ADDR); + 0, 0, offset, size, i2c_addr); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mcia), mcia_pl); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f0fb898..2cfb3f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4868,7 +4868,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; - if (!net_eq(info->net, &init_net)) + if (!net_eq(info->net, &init_net) || + (info->family != AF_INET && info->family != AF_INET6)) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index d396183..a18b4d2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -44,6 +44,16 @@ #include "../nfp_net.h" #include "../nfp_port.h" +#define NFP_FLOWER_WHITELIST_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_BASIC) | \ + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_PORTS) | \ + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT(FLOW_DISSECTOR_KEY_IP)) + static int nfp_flower_xmit_flow(struct net_device *netdev, struct nfp_fl_payload *nfp_flow, u8 mtype) @@ -112,6 +122,9 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, u8 key_layer; int key_size; + if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) + return -EOPNOTSUPP; + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { struct flow_dissector_key_control *mask_enc_ctl = diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index f055b17..f8fa63b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -74,6 +74,45 @@ static const struct pci_device_id nfp_pci_device_ids[] = { }; MODULE_DEVICE_TABLE(pci, nfp_pci_device_ids); +static bool nfp_board_ready(struct nfp_pf *pf) +{ + const char *cp; + long state; + int err; + + cp = nfp_hwinfo_lookup(pf->hwinfo, "board.state"); + if (!cp) + return false; + + err = kstrtol(cp, 0, &state); + if (err < 0) + return false; + + return state == 15; +} + +static int nfp_pf_board_state_wait(struct nfp_pf *pf) +{ + const unsigned long wait_until = jiffies + 10 * HZ; + + while (!nfp_board_ready(pf)) { + if (time_is_before_eq_jiffies(wait_until)) { + nfp_err(pf->cpp, "NFP board initialization timeout\n"); + return -EINVAL; + } + + nfp_info(pf->cpp, "waiting for board initialization\n"); + if (msleep_interruptible(500)) + return -ERESTARTSYS; + + /* Refresh cached information */ + kfree(pf->hwinfo); + pf->hwinfo = nfp_hwinfo_read(pf->cpp); + } + + return 0; +} + static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf) { int err; @@ -312,6 +351,10 @@ static int nfp_nsp_init(struct pci_dev *pdev, struct nfp_pf *pf) struct nfp_nsp *nsp; int err; + err = nfp_resource_wait(pf->cpp, NFP_RESOURCE_NSP, 30); + if (err) + return err; + nsp = nfp_nsp_open(pf->cpp); if (IS_ERR(nsp)) { err = PTR_ERR(nsp); @@ -425,6 +468,10 @@ static int nfp_pci_probe(struct pci_dev *pdev, nfp_hwinfo_lookup(pf->hwinfo, "assembly.revision"), nfp_hwinfo_lookup(pf->hwinfo, "cpld.version")); + err = nfp_pf_board_state_wait(pf); + if (err) + goto err_hwinfo_free; + err = devlink_register(devlink, &pdev->dev); if (err) goto err_hwinfo_free; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 5abb9ba..ff373ac 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -64,23 +64,6 @@ #define NFP_PF_CSR_SLICE_SIZE (32 * 1024) -static int nfp_is_ready(struct nfp_pf *pf) -{ - const char *cp; - long state; - int err; - - cp = nfp_hwinfo_lookup(pf->hwinfo, "board.state"); - if (!cp) - return 0; - - err = kstrtol(cp, 0, &state); - if (err < 0) - return 0; - - return state == 15; -} - /** * nfp_net_get_mac_addr() - Get the MAC address. * @pf: NFP PF handle @@ -725,12 +708,6 @@ int nfp_net_pci_probe(struct nfp_pf *pf) INIT_WORK(&pf->port_refresh_work, nfp_net_refresh_vnics); - /* Verify that the board has completed initialization */ - if (!nfp_is_ready(pf)) { - nfp_err(pf->cpp, "NFP is not ready for NIC operation.\n"); - return -EINVAL; - } - if (!pf->rtbl) { nfp_err(pf->cpp, "No %s, giving up.\n", pf->fw_loaded ? "symbol table" : "firmware found"); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h index 1a8d04a..3ce51f0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp.h @@ -97,6 +97,8 @@ nfp_resource_acquire(struct nfp_cpp *cpp, const char *name); void nfp_resource_release(struct nfp_resource *res); +int nfp_resource_wait(struct nfp_cpp *cpp, const char *name, unsigned int secs); + u32 nfp_resource_cpp_id(struct nfp_resource *res); const char *nfp_resource_name(struct nfp_resource *res); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c index 0726122..b1dd13f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_resource.c @@ -250,6 +250,51 @@ void nfp_resource_release(struct nfp_resource *res) } /** + * nfp_resource_wait() - Wait for resource to appear + * @cpp: NFP CPP handle + * @name: Name of the resource + * @secs: Number of seconds to wait + * + * Wait for resource to appear in the resource table, grab and release + * its lock. The wait is jiffies-based, don't expect fine granularity. + * + * Return: 0 on success, errno otherwise. + */ +int nfp_resource_wait(struct nfp_cpp *cpp, const char *name, unsigned int secs) +{ + unsigned long warn_at = jiffies + NFP_MUTEX_WAIT_FIRST_WARN * HZ; + unsigned long err_at = jiffies + secs * HZ; + struct nfp_resource *res; + + while (true) { + res = nfp_resource_acquire(cpp, name); + if (!IS_ERR(res)) { + nfp_resource_release(res); + return 0; + } + + if (PTR_ERR(res) != -ENOENT) { + nfp_err(cpp, "error waiting for resource %s: %ld\n", + name, PTR_ERR(res)); + return PTR_ERR(res); + } + if (time_is_before_eq_jiffies(err_at)) { + nfp_err(cpp, "timeout waiting for resource %s\n", name); + return -ETIMEDOUT; + } + if (time_is_before_eq_jiffies(warn_at)) { + warn_at = jiffies + NFP_MUTEX_WAIT_NEXT_WARN * HZ; + nfp_info(cpp, "waiting for NFP resource %s\n", name); + } + if (msleep_interruptible(10)) { + nfp_err(cpp, "wait for resource %s interrupted\n", + name); + return -ERESTARTSYS; + } + } +} + +/** * nfp_resource_cpp_id() - Return the cpp_id of a resource handle * @res: NFP Resource handle * diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 89ab786..4a67c55 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/interrupt.h> #include <linux/mii.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index eaca457..8f6ccc0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1244,7 +1244,6 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, if (!dcbx_info) return -ENOMEM; - memset(dcbx_info, 0, sizeof(*dcbx_info)); rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { kfree(dcbx_info); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c index 557c9bf..86b8c75 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -84,6 +84,10 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb) if (((int)skb->len - (int)packet_len) < 0) return NULL; + /* Some hardware can send us empty frames. Catch them */ + if (ntohs(maph->pkt_len) == 0) + return NULL; + skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC); if (!skbn) return NULL; @@ -94,11 +98,5 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb) memcpy(skbn->data, skb->data, packet_len); skb_pull(skb, packet_len); - /* Some hardware can send us empty frames. Catch them */ - if (ntohs(maph->pkt_len) == 0) { - kfree_skb(skb); - return NULL; - } - return skbn; } diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 0b6a39b..012fb66 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2595,6 +2595,11 @@ static int smsc911x_suspend(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct smsc911x_data *pdata = netdev_priv(ndev); + if (netif_running(ndev)) { + netif_stop_queue(ndev); + netif_device_detach(ndev); + } + /* enable wake on LAN, energy detection and the external PME * signal. */ smsc911x_reg_write(pdata, PMT_CTRL, @@ -2628,7 +2633,15 @@ static int smsc911x_resume(struct device *dev) while (!(smsc911x_reg_read(pdata, PMT_CTRL) & PMT_CTRL_READY_) && --to) udelay(1000); - return (to == 0) ? -EIO : 0; + if (to == 0) + return -EIO; + + if (netif_running(ndev)) { + netif_device_attach(ndev); + netif_start_queue(ndev); + } + + return 0; } static const struct dev_pm_ops smsc911x_pm_ops = { diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ec546da..d98cdfb 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -204,6 +204,8 @@ int netvsc_recv_callback(struct net_device *net, const struct ndis_pkt_8021q_info *vlan); void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); + +void rndis_set_subchannel(struct work_struct *w); bool rndis_filter_opened(const struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); @@ -782,6 +784,7 @@ struct netvsc_device { u32 num_chn; atomic_t open_chn; + struct work_struct subchan_work; wait_queue_head_t subchan_open; struct rndis_device *extension; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0062b80..a5511b7 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -81,6 +81,7 @@ static struct netvsc_device *alloc_net_device(void) init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); + INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); return net_device; } @@ -557,6 +558,8 @@ void netvsc_device_remove(struct hv_device *device) = rtnl_dereference(net_device_ctx->nvdev); int i; + cancel_work_sync(&net_device->subchan_work); + netvsc_disconnect_vsp(device); RCU_INIT_POINTER(net_device_ctx->nvdev, NULL); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 165ba4b..d4902ee5f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -49,7 +49,7 @@ #define NETVSC_MIN_TX_SECTIONS 10 #define NETVSC_DEFAULT_TX 192 /* ~1M */ #define NETVSC_MIN_RX_SECTIONS 10 /* ~64K */ -#define NETVSC_DEFAULT_RX 2048 /* ~4M */ +#define NETVSC_DEFAULT_RX 10485 /* Max ~16M */ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) @@ -853,10 +853,7 @@ static int netvsc_set_channels(struct net_device *net, rndis_filter_device_remove(dev, nvdev); nvdev = rndis_filter_device_add(dev, &device_info); - if (!IS_ERR(nvdev)) { - netif_set_real_num_tx_queues(net, nvdev->num_chn); - netif_set_real_num_rx_queues(net, nvdev->num_chn); - } else { + if (IS_ERR(nvdev)) { ret = PTR_ERR(nvdev); device_info.num_chn = orig; nvdev = rndis_filter_device_add(dev, &device_info); @@ -1954,9 +1951,6 @@ static int netvsc_probe(struct hv_device *dev, NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; net->vlan_features = net->features; - netif_set_real_num_tx_queues(net, nvdev->num_chn); - netif_set_real_num_rx_queues(net, nvdev->num_chn); - netdev_lockdep_set_classes(net); /* MTU range: 68 - 1500 or 65521 */ @@ -2012,9 +2006,10 @@ static int netvsc_remove(struct hv_device *dev) if (vf_netdev) netvsc_unregister_vf(vf_netdev); + unregister_netdevice(net); + rndis_filter_device_remove(dev, rtnl_dereference(ndev_ctx->nvdev)); - unregister_netdevice(net); rtnl_unlock(); hv_set_drvdata(dev, NULL); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 69c40b8..065b204 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1039,8 +1039,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) /* Set the channel before opening.*/ nvchan->channel = new_sc; - netif_napi_add(ndev, &nvchan->napi, - netvsc_poll, NAPI_POLL_WEIGHT); ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, nvscdev->ring_size * PAGE_SIZE, NULL, 0, @@ -1048,10 +1046,86 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) if (ret == 0) napi_enable(&nvchan->napi); else - netif_napi_del(&nvchan->napi); + netdev_notice(ndev, "sub channel open failed: %d\n", ret); - atomic_inc(&nvscdev->open_chn); - wake_up(&nvscdev->subchan_open); + if (atomic_inc_return(&nvscdev->open_chn) == nvscdev->num_chn) + wake_up(&nvscdev->subchan_open); +} + +/* Open sub-channels after completing the handling of the device probe. + * This breaks overlap of processing the host message for the + * new primary channel with the initialization of sub-channels. + */ +void rndis_set_subchannel(struct work_struct *w) +{ + struct netvsc_device *nvdev + = container_of(w, struct netvsc_device, subchan_work); + struct nvsp_message *init_packet = &nvdev->channel_init_pkt; + struct net_device_context *ndev_ctx; + struct rndis_device *rdev; + struct net_device *ndev; + struct hv_device *hv_dev; + int i, ret; + + if (!rtnl_trylock()) { + schedule_work(w); + return; + } + + rdev = nvdev->extension; + if (!rdev) + goto unlock; /* device was removed */ + + ndev = rdev->ndev; + ndev_ctx = netdev_priv(ndev); + hv_dev = ndev_ctx->device_ctx; + + memset(init_packet, 0, sizeof(struct nvsp_message)); + init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; + init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; + init_packet->msg.v5_msg.subchn_req.num_subchannels = + nvdev->num_chn - 1; + ret = vmbus_sendpacket(hv_dev->channel, init_packet, + sizeof(struct nvsp_message), + (unsigned long)init_packet, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret) { + netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); + goto failed; + } + + wait_for_completion(&nvdev->channel_init_wait); + if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { + netdev_err(ndev, "sub channel request failed\n"); + goto failed; + } + + nvdev->num_chn = 1 + + init_packet->msg.v5_msg.subchn_comp.num_subchannels; + + /* wait for all sub channels to open */ + wait_event(nvdev->subchan_open, + atomic_read(&nvdev->open_chn) == nvdev->num_chn); + + /* ignore failues from setting rss parameters, still have channels */ + rndis_filter_set_rss_param(rdev, netvsc_hash_key); + + netif_set_real_num_tx_queues(ndev, nvdev->num_chn); + netif_set_real_num_rx_queues(ndev, nvdev->num_chn); + + rtnl_unlock(); + return; + +failed: + /* fallback to only primary channel */ + for (i = 1; i < nvdev->num_chn; i++) + netif_napi_del(&nvdev->chan_table[i].napi); + + nvdev->max_chn = 1; + nvdev->num_chn = 1; +unlock: + rtnl_unlock(); } struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, @@ -1063,7 +1137,6 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct rndis_device *rndis_device; struct ndis_offload hwcaps; struct ndis_offload_params offloads; - struct nvsp_message *init_packet; struct ndis_recv_scale_cap rsscap; u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); unsigned int gso_max_size = GSO_MAX_SIZE; @@ -1215,9 +1288,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, net_device->num_chn); atomic_set(&net_device->open_chn, 1); - - if (net_device->num_chn == 1) - return net_device; + vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); for (i = 1; i < net_device->num_chn; i++) { ret = netvsc_alloc_recv_comp_ring(net_device, i); @@ -1228,38 +1299,15 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, } } - vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); + for (i = 1; i < net_device->num_chn; i++) + netif_napi_add(net, &net_device->chan_table[i].napi, + netvsc_poll, NAPI_POLL_WEIGHT); - init_packet = &net_device->channel_init_pkt; - memset(init_packet, 0, sizeof(struct nvsp_message)); - init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; - init_packet->msg.v5_msg.subchn_req.op = NVSP_SUBCHANNEL_ALLOCATE; - init_packet->msg.v5_msg.subchn_req.num_subchannels = - net_device->num_chn - 1; - ret = vmbus_sendpacket(dev->channel, init_packet, - sizeof(struct nvsp_message), - (unsigned long)init_packet, - VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); - if (ret) - goto out; - - wait_for_completion(&net_device->channel_init_wait); - if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { - ret = -ENODEV; - goto out; - } + if (net_device->num_chn > 1) + schedule_work(&net_device->subchan_work); - net_device->num_chn = 1 + - init_packet->msg.v5_msg.subchn_comp.num_subchannels; - - /* wait for all sub channels to open */ - wait_event(net_device->subchan_open, - atomic_read(&net_device->open_chn) == net_device->num_chn); - - /* ignore failues from setting rss parameters, still have channels */ - rndis_filter_set_rss_param(rndis_device, netvsc_hash_key); out: + /* if unavailable, just proceed with one queue */ if (ret) { net_device->max_chn = 1; net_device->num_chn = 1; @@ -1280,10 +1328,10 @@ void rndis_filter_device_remove(struct hv_device *dev, /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); - kfree(rndis_dev); net_dev->extension = NULL; netvsc_device_remove(dev); + kfree(rndis_dev); } int rndis_filter_open(struct netvsc_device *nvdev) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 340c134..309b88a 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -526,7 +526,7 @@ static void smsc95xx_set_multicast(struct net_device *netdev) static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex, u16 lcladv, u16 rmtadv) { - u32 flow, afc_cfg = 0; + u32 flow = 0, afc_cfg; int ret = smsc95xx_read_reg(dev, AFC_CFG, &afc_cfg); if (ret < 0) @@ -537,20 +537,19 @@ static int smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex, if (cap & FLOW_CTRL_RX) flow = 0xFFFF0002; - else - flow = 0; - if (cap & FLOW_CTRL_TX) + if (cap & FLOW_CTRL_TX) { afc_cfg |= 0xF; - else + flow |= 0xFFFF0000; + } else { afc_cfg &= ~0xF; + } netif_dbg(dev, link, dev->net, "rx pause %s, tx pause %s\n", cap & FLOW_CTRL_RX ? "enabled" : "disabled", cap & FLOW_CTRL_TX ? "enabled" : "disabled"); } else { netif_dbg(dev, link, dev->net, "half duplex\n"); - flow = 0; afc_cfg |= 0xF; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 7e19051..9b243e6 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -957,12 +957,12 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, { const struct ipv6hdr *iph = ipv6_hdr(skb); struct flowi6 fl6 = { + .flowi6_iif = ifindex, + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), - .flowi6_mark = skb->mark, - .flowi6_proto = iph->nexthdr, - .flowi6_iif = ifindex, }; struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88951b7..95606a2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -200,6 +200,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE_MAXARGS 6 + #define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) diff --git a/include/net/act_api.h b/include/net/act_api.h index 8f3d5d8..b944e0e 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -34,7 +34,6 @@ struct tc_action { struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; @@ -50,7 +49,6 @@ struct tc_action { #define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock -#define tcf_rcu common.tcfa_rcu /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 06b4f51..d7d8cba 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -127,7 +127,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p); + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 1060494..b8c86ec 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 862575a..4e16c43 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -138,11 +138,11 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ - 0, map, idx); + 0, map, idx) #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ - err, map, idx); + err, map, idx) #endif /* _TRACE_XDP_H */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 477b693..799b245 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2292,7 +2292,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } } else { if (insn->src_reg != BPF_REG_0 || insn->off != 0 || - (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) { + (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) || + BPF_CLASS(insn->code) == BPF_ALU64) { verbose("BPF_END uses reserved fields\n"); return -EINVAL; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9c4eef2..696afe7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -565,7 +565,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; - unsigned long args[sys_data->nb_args]; + unsigned long args[SYSCALL_DEFINE_MAXARGS]; } param; int i; diff --git a/net/core/filter.c b/net/core/filter.c index 3a50a9b..24dd33d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2506,21 +2506,19 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct redirect_info *ri = this_cpu_ptr(&redirect_info); const struct bpf_prog *map_owner = ri->map_owner; struct bpf_map *map = ri->map; + struct net_device *fwd = NULL; u32 index = ri->ifindex; - struct net_device *fwd; int err; ri->ifindex = 0; ri->map = NULL; ri->map_owner = NULL; - /* This is really only caused by a deliberately crappy - * BPF program, normally we would never hit that case, - * so no need to inform someone via tracepoints either, - * just bail out. - */ - if (unlikely(map_owner != xdp_prog)) - return -EINVAL; + if (unlikely(map_owner != xdp_prog)) { + err = -EFAULT; + map = NULL; + goto err; + } fwd = __dev_map_lookup_elem(map, index); if (!fwd) { @@ -2576,13 +2574,27 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + const struct bpf_prog *map_owner = ri->map_owner; + struct bpf_map *map = ri->map; + struct net_device *fwd = NULL; u32 index = ri->ifindex; - struct net_device *fwd; unsigned int len; int err = 0; - fwd = dev_get_by_index_rcu(dev_net(dev), index); ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = NULL; + + if (map) { + if (unlikely(map_owner != xdp_prog)) { + err = -EFAULT; + map = NULL; + goto err; + } + fwd = __dev_map_lookup_elem(map, index); + } else { + fwd = dev_get_by_index_rcu(dev_net(dev), index); + } if (unlikely(!fwd)) { err = -EINVAL; goto err; @@ -2600,10 +2612,12 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, } skb->dev = fwd; - _trace_xdp_redirect(dev, xdp_prog, index); + map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) + : _trace_xdp_redirect(dev, xdp_prog, index); return 0; err: - _trace_xdp_redirect_err(dev, xdp_prog, index, err); + map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) + : _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0385dec..7c1ffd6 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -83,10 +83,10 @@ static void est_timer(unsigned long arg) u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log); + brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log); brate -= (est->avbps >> est->ewma_log); - rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log); + rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log); rate -= (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4089c01..b9c64b4 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -916,7 +916,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, tcp_sk(child)->fastopen_rsk = NULL; } inet_csk_destroy_sock(child); - reqsk_put(req); } struct sock *inet_csk_reqsk_queue_add(struct sock *sk, @@ -987,6 +986,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_hold(child); inet_child_forget(sk, req, child); + reqsk_put(req); bh_unlock_sock(child); local_bh_enable(); sock_put(child); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e558e4f..a599aa8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1207,7 +1207,6 @@ e_inval: void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); - bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) || ipv6_sk_rxinfo(sk); @@ -1221,8 +1220,13 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * (e.g., process binds socket to eth0 for Tx which is * redirected to loopback in the rtable/dst). */ - if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX || l3slave) + struct rtable *rt = skb_rtable(skb); + bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); + + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) pktinfo->ipi_ifindex = inet_iif(skb); + else if (l3slave && rt && rt->rt_iif) + pktinfo->ipi_ifindex = rt->rt_iif; pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e1856bf..e9805ad 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -176,7 +176,7 @@ skip_key_lookup: return cand; t = rcu_dereference(itn->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5b6690d0..1c839c9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -991,6 +991,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_skb_cb *tcb; struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; + struct sk_buff *oskb = NULL; struct tcp_md5sig_key *md5; struct tcphdr *th; int err; @@ -998,12 +999,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); - skb->skb_mstamp = tp->tcp_mstamp; if (clone_it) { TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; - tcp_rate_skb_sent(sk, skb); - + oskb = skb; if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); else @@ -1011,6 +1010,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (unlikely(!skb)) return -ENOBUFS; } + skb->skb_mstamp = tp->tcp_mstamp; inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); @@ -1122,12 +1122,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); - if (likely(err <= 0)) - return err; - - tcp_enter_cwr(sk); - - return net_xmit_eval(err); + if (unlikely(err > 0)) { + tcp_enter_cwr(sk); + err = net_xmit_eval(err); + } + if (!err && oskb) { + oskb->skb_mstamp = tp->tcp_mstamp; + tcp_rate_skb_sent(sk, oskb); + } + return err; } /* This routine just queues the buffer for sending. @@ -2869,10 +2872,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - skb->skb_mstamp = tp->tcp_mstamp; nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : -ENOBUFS; + if (!err) + skb->skb_mstamp = tp->tcp_mstamp; } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 10a693a..ae73164 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -171,7 +171,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ } t = rcu_dereference(ip6n->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; t = rcu_dereference(ip6n->tnls_wc[0]); diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 7ff54db..825b8e0 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -72,10 +72,6 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb) srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - /* make sure it's a Segment Routing header (Routing Type 4) */ - if (srh->type != IPV6_SRCRT_TYPE_4) - return NULL; - len = (srh->hdrlen + 1) << 3; if (!pskb_may_pull(skb, srhoff + len)) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 76cf273..c3aec62 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1112,7 +1112,8 @@ static int ovs_nla_init_match_and_action(struct net *net, if (!a[OVS_FLOW_ATTR_KEY]) { OVS_NLERR(log, "Flow key attribute not present in set flow."); - return -EINVAL; + error = -EINVAL; + goto error; } *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a306974..da6fa82 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, res->goto_tp = rcu_dereference_bh(chain->filter_chain); } -static void free_tcf(struct rcu_head *head) +/* XXX: For standalone actions, we don't need a RCU grace period either, because + * actions are always connected to filters and filters are already destroyed in + * RCU callbacks, so after a RCU grace period actions are already disconnected + * from filters. Readers later can not find us. + */ +static void free_tcf(struct tc_action *p) { - struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); - free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); @@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); - /* - * gen_estimator est_timer() might access p->tcfa_lock - * or bstats, wait a RCU grace period before freeing p - */ - call_rcu(&p->tcfa_rcu, free_tcf); + free_tcf(p); } int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) @@ -181,7 +180,7 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, idr_for_each_entry_ext(idr, p, id) { ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); + module_put(ops->owner); n_i++; } else if (ret < 0) { goto nla_put_failure; @@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); - call_rcu(&a->tcfa_rcu, free_tcf); + free_tcf(a); } EXPORT_SYMBOL(tcf_idr_cleanup); @@ -515,13 +514,15 @@ EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct list_head *actions, int bind) { + const struct tc_action_ops *ops; struct tc_action *a, *tmp; int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { + ops = a->ops; ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) - module_put(a->ops->owner); + module_put(ops->owner); else if (ret < 0) return ret; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c743f03..0b2219a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -182,7 +182,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, list_add_tail(&chain->list, &block->chain_list); chain->block = block; chain->index = chain_index; - chain->refcnt = 0; + chain->refcnt = 1; return chain; } @@ -194,21 +194,20 @@ static void tcf_chain_flush(struct tcf_chain *chain) RCU_INIT_POINTER(*chain->p_filter_chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); + tcf_chain_put(chain); tcf_proto_destroy(tp); } } static void tcf_chain_destroy(struct tcf_chain *chain) { - /* May be already removed from the list by the previous call. */ - if (!list_empty(&chain->list)) - list_del_init(&chain->list); + list_del(&chain->list); + kfree(chain); +} - /* There might still be a reference held when we got here from - * tcf_block_put. Wait for the user to drop reference before free. - */ - if (!chain->refcnt) - kfree(chain); +static void tcf_chain_hold(struct tcf_chain *chain) +{ + ++chain->refcnt; } struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -217,24 +216,19 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, struct tcf_chain *chain; list_for_each_entry(chain, &block->chain_list, list) { - if (chain->index == chain_index) - goto incref; + if (chain->index == chain_index) { + tcf_chain_hold(chain); + return chain; + } } - chain = create ? tcf_chain_create(block, chain_index) : NULL; -incref: - if (chain) - chain->refcnt++; - return chain; + return create ? tcf_chain_create(block, chain_index) : NULL; } EXPORT_SYMBOL(tcf_chain_get); void tcf_chain_put(struct tcf_chain *chain) { - /* Destroy unused chain, with exception of chain 0, which is the - * default one and has to be always present. - */ - if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + if (--chain->refcnt == 0) tcf_chain_destroy(chain); } EXPORT_SYMBOL(tcf_chain_put); @@ -279,10 +273,31 @@ void tcf_block_put(struct tcf_block *block) if (!block) return; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + /* XXX: Standalone actions are not allowed to jump to any chain, and + * bound actions should be all removed after flushing. However, + * filters are destroyed in RCU callbacks, we have to hold the chains + * first, otherwise we would always race with RCU callbacks on this list + * without proper locking. + */ + + /* Wait for existing RCU callbacks to cool down. */ + rcu_barrier(); + + /* Hold a refcnt for all chains, except 0, in case they are gone. */ + list_for_each_entry(chain, &block->chain_list, list) + if (chain->index) + tcf_chain_hold(chain); + + /* No race on the list, because no chain could be destroyed. */ + list_for_each_entry(chain, &block->chain_list, list) tcf_chain_flush(chain); - tcf_chain_destroy(chain); - } + + /* Wait for RCU callbacks to release the reference count. */ + rcu_barrier(); + + /* At this point, all the chains should have refcnt == 1. */ + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_put(chain); kfree(block); } EXPORT_SYMBOL(tcf_block_put); @@ -360,6 +375,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, rcu_assign_pointer(*chain->p_filter_chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); + tcf_chain_hold(chain); } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -371,6 +387,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, if (chain->p_filter_chain && tp == chain->filter_chain) RCU_INIT_POINTER(*chain->p_filter_chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); + tcf_chain_put(chain); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 98c05db..b1f6ed4 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -389,7 +389,7 @@ static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) if ((data->hgenerator += 0x10000) == 0) data->hgenerator = 0x10000; h = data->hgenerator|salt; - if (rsvp_get(tp, h) == 0) + if (!rsvp_get(tp, h)) return h; } return 0; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index e99518e..22ed01a 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -279,9 +279,11 @@ out: return err; } -static int sctp_sock_dump(struct sock *sk, void *p) +static int sctp_sock_dump(struct sctp_transport *tsp, void *p) { + struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; @@ -289,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p) int err = 0; lock_sock(sk); - if (!sctp_sk(sk)->ep) - goto release; - list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) { + list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -309,7 +309,6 @@ static int sctp_sock_dump(struct sock *sk, void *p) cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, commp->net_admin) < 0) { - cb->args[3] = 1; err = 1; goto release; } @@ -327,40 +326,30 @@ next: cb->args[4]++; } cb->args[1] = 0; - cb->args[2]++; cb->args[3] = 0; cb->args[4] = 0; release: release_sock(sk); - sock_put(sk); return err; } -static int sctp_get_sock(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_transport *tsp, void *p) { struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; - struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; struct sctp_association *assoc = list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ if (tsp->asoc != assoc) - goto out; + return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - - sock_hold(sk); - cb->args[5] = (long)sk; + return 0; return 1; - -out: - cb->args[2]++; - return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -503,12 +492,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; -next: - cb->args[5] = 0; - sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); - - if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) - goto next; + sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, + net, (int *)&cb->args[2], &commp); done: cb->args[1] = cb->args[4]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1b00a1e..d4730ad 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4658,29 +4658,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p) { + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p) { struct rhashtable_iter hti; - void *obj; - int err; - - err = sctp_transport_walk_start(&hti); - if (err) - return err; + struct sctp_transport *tsp; + int ret; - obj = sctp_transport_get_idx(net, &hti, pos + 1); - for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) { - struct sctp_transport *transport = obj; +again: + ret = sctp_transport_walk_start(&hti); + if (ret) + return ret; - if (!sctp_transport_hold(transport)) + tsp = sctp_transport_get_idx(net, &hti, *pos + 1); + for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { + if (!sctp_transport_hold(tsp)) continue; - err = cb(transport, p); - sctp_transport_put(transport); - if (err) + ret = cb(tsp, p); + if (ret) break; + (*pos)++; + sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); - return err; + if (ret) { + if (cb_done && !cb_done(tsp, p)) { + (*pos)++; + sctp_transport_put(tsp); + goto again; + } + sctp_transport_put(tsp); + } + + return ret; } EXPORT_SYMBOL_GPL(sctp_for_each_transport); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fa596fa..7d80040 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -639,7 +639,7 @@ sendpage_end: return ret; } -void tls_sw_free_resources(struct sock *sk) +static void tls_sw_free_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8eb0995..26f3250 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6629,6 +6629,22 @@ static struct bpf_test tests[] = { .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, + { + "invalid 64-bit BPF_END", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 0), + { + .code = BPF_ALU64 | BPF_END | BPF_TO_LE, + .dst_reg = BPF_REG_0, + .src_reg = 0, + .off = 0, + .imm = 32, + }, + BPF_EXIT_INSN(), + }, + .errstr = "BPF_END uses reserved fields", + .result = REJECT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) |