summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt7
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt2
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/can.rst1437
-rw-r--r--Documentation/networking/can.txt1308
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/pktgen.txt19
-rw-r--r--Documentation/networking/xfrm_device.txt3
-rw-r--r--Documentation/virtual/kvm/api.txt46
-rw-r--r--Documentation/x86/pti.txt2
-rw-r--r--MAINTAINERS15
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/kernel/sys_sio.c35
-rw-r--r--arch/alpha/lib/ev6-memset.S12
-rw-r--r--arch/arm/boot/dts/imx6q-b450v3.dts52
-rw-r--r--arch/arm/boot/dts/imx6q-b650v3.dts52
-rw-r--r--arch/arm/boot/dts/imx6q-b850v3.dts75
-rw-r--r--arch/arm/boot/dts/imx6q-bx50v3.dtsi62
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/mips/Kconfig12
-rw-r--r--arch/mips/Kconfig.debug14
-rw-r--r--arch/mips/ar7/platform.c2
-rw-r--r--arch/mips/ath25/devices.c2
-rw-r--r--arch/mips/kernel/mips-cm.c1
-rw-r--r--arch/mips/lib/Makefile3
-rw-r--r--arch/mips/lib/libgcc.h17
-rw-r--r--arch/mips/lib/multi3.c54
-rw-r--r--arch/mips/mm/uasm-micromips.c2
-rw-r--r--arch/mips/ralink/timer.c4
-rw-r--r--arch/mips/rb532/Makefile4
-rw-r--r--arch/mips/rb532/devices.c4
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h25
-rw-r--r--arch/powerpc/kvm/powerpc.c131
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c12
-rw-r--r--arch/s390/kvm/vsie.c10
-rw-r--r--arch/sparc/crypto/Makefile2
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/include/asm/nospec-branch.h10
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/ftrace_64.S24
-rw-r--r--arch/x86/kernel/kprobes/opt.c23
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/unwind_orc.c48
-rw-r--r--arch/x86/kernel/vmlinux.lds.S6
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/retpoline.S5
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/pci/fixup.c32
-rw-r--r--drivers/bluetooth/btbcm.c1
-rw-r--r--drivers/bluetooth/btintel.c155
-rw-r--r--drivers/bluetooth/btintel.h33
-rw-r--r--drivers/bluetooth/btusb.c133
-rw-r--r--drivers/bluetooth/hci_bcm.c5
-rw-r--r--drivers/bluetooth/hci_intel.c186
-rw-r--r--drivers/md/dm-crypt.c20
-rw-r--r--drivers/md/dm-integrity.c49
-rw-r--r--drivers/md/dm-thin-metadata.c6
-rw-r--r--drivers/md/persistent-data/dm-btree.c19
-rw-r--r--drivers/net/can/dev.c2
-rw-r--r--drivers/net/can/vcan.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c3
-rw-r--r--drivers/net/ethernet/chelsio/Kconfig1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/Makefile3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c51
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h13
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c34
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c58
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/adapter.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c15
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c28
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c5
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/emac.h4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c73
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_iov.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c54
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h8
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.c36
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c185
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c141
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h6
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c54
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h8
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c54
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c13
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c35
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c32
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c72
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/Makefile1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h33
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c39
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c941
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h93
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c59
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h22
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h16
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c362
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c4
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c7
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c13
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c31
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.h3
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c7
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c1
-rw-r--r--drivers/net/ethernet/sfc/ef10.c158
-rw-r--r--drivers/net/ethernet/sfc/efx.c11
-rw-r--r--drivers/net/ethernet/sfc/farch.c26
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h21
-rw-r--r--drivers/net/ethernet/sfc/nic.h4
-rw-r--r--drivers/net/ethernet/sfc/ptp.c368
-rw-r--r--drivers/net/ethernet/sfc/tx.c21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c2
-rw-r--r--drivers/net/netdevsim/Makefile6
-rw-r--r--drivers/net/netdevsim/bpf.c38
-rw-r--r--drivers/net/netdevsim/netdevsim.h28
-rw-r--r--drivers/net/phy/sfp-bus.c2
-rw-r--r--drivers/net/ppp/pppoe.c11
-rw-r--r--drivers/net/usb/usbnet.c8
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c2
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c17
-rw-r--r--drivers/tty/serdev/core.c12
-rw-r--r--drivers/tty/serdev/serdev-ttyport.c24
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--fs/nfsd/auth.c6
-rw-r--r--fs/orangefs/devorangefs-req.c3
-rw-r--r--fs/orangefs/waitqueue.c4
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/inetdevice.h2
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/netdevice.h12
-rw-r--r--include/linux/serdev.h10
-rw-r--r--include/linux/swapops.h21
-rw-r--r--include/linux/vermagic.h8
-rw-r--r--include/net/erspan.h127
-rw-r--r--include/net/ipv6.h1
-rw-r--r--include/net/pkt_cls.h40
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sch_generic.h3
-rw-r--r--include/net/tc_act/tc_csum.h16
-rw-r--r--include/net/wext.h4
-rw-r--r--include/net/xfrm.h12
-rw-r--r--include/uapi/linux/erspan.h52
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--include/uapi/linux/openvswitch.h1
-rw-r--r--kernel/irq/matrix.c20
-rw-r--r--kernel/trace/ftrace.c29
-rw-r--r--kernel/trace/trace.c34
-rw-r--r--kernel/trace/trace_events_trigger.c13
-rw-r--r--kernel/trace/trace_functions.c49
-rw-r--r--mm/page_vma_mapped.c66
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dev_ioctl.c132
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/net_namespace.c62
-rw-r--r--net/core/pktgen.c266
-rw-r--r--net/dsa/dsa2.c7
-rw-r--r--net/dsa/dsa_priv.h4
-rw-r--r--net/dsa/legacy.c4
-rw-r--r--net/dsa/port.c103
-rw-r--r--net/ipv4/af_inet.c28
-rw-r--r--net/ipv4/devinet.c57
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ip_gre.c38
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/ipconfig.c47
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/ip6_gre.c36
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/route.c146
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/kcm/kcmsock.c25
-rw-r--r--net/openvswitch/flow_netlink.c52
-rw-r--r--net/rds/tcp.c5
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/sched/act_csum.c66
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_bpf.c24
-rw-r--r--net/sched/cls_cgroup.c3
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c24
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_matchall.c13
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_rsvp.h2
-rw-r--r--net/sched/cls_tcindex.c3
-rw-r--r--net/sched/cls_u32.c42
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/smc/af_smc.c207
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c40
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_close.c206
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_tx.c23
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c271
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/wireless/wext-core.c13
-rw-r--r--net/xfrm/xfrm_device.c12
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c18
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py216
-rw-r--r--virt/kvm/arm/mmu.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c2
266 files changed, 7184 insertions, 3707 deletions
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 06bb7cc..94a7f33 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -2,7 +2,9 @@ Renesas R-Car CAN controller Device Tree Bindings
-------------------------------------------------
Required properties:
-- compatible: "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
+- compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+ "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
+ "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
"renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
"renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
"renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
@@ -12,7 +14,8 @@ Required properties:
"renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
"renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC.
"renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
- "renesas,rcar-gen2-can" for a generic R-Car Gen2 compatible device.
+ "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
+ compatible device.
"renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
index 4ccb2cd..d096cf4 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
@@ -195,4 +195,4 @@ External interrupts:
fsl,mpc5200-mscan nodes
-----------------------
-See file can.txt in this directory.
+See file Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index f5d642c..2b89d91 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -36,8 +36,6 @@ bonding.txt
- Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
bridge.txt
- where to get user space programs for ethernet bridging with Linux.
-can.txt
- - documentation on CAN protocol family.
cdc_mbim.txt
- 3G/LTE USB modem (Mobile Broadband Interface Model)
checksum-offloads.txt
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
new file mode 100644
index 0000000..d23c51a
--- /dev/null
+++ b/Documentation/networking/can.rst
@@ -0,0 +1,1437 @@
+===================================
+SocketCAN - Controller Area Network
+===================================
+
+Overview / What is SocketCAN
+============================
+
+The socketcan package is an implementation of CAN protocols
+(Controller Area Network) for Linux. CAN is a networking technology
+which has widespread use in automation, embedded devices, and
+automotive fields. While there have been other CAN implementations
+for Linux based on character devices, SocketCAN uses the Berkeley
+socket API, the Linux network stack and implements the CAN device
+drivers as network interfaces. The CAN socket API has been designed
+as similar as possible to the TCP/IP protocols to allow programmers,
+familiar with network programming, to easily learn how to use CAN
+sockets.
+
+
+.. _socketcan-motivation:
+
+Motivation / Why Using the Socket API
+=====================================
+
+There have been CAN implementations for Linux before SocketCAN so the
+question arises, why we have started another project. Most existing
+implementations come as a device driver for some CAN hardware, they
+are based on character devices and provide comparatively little
+functionality. Usually, there is only a hardware-specific device
+driver which provides a character device interface to send and
+receive raw CAN frames, directly to/from the controller hardware.
+Queueing of frames and higher-level transport protocols like ISO-TP
+have to be implemented in user space applications. Also, most
+character-device implementations support only one single process to
+open the device at a time, similar to a serial interface. Exchanging
+the CAN controller requires employment of another device driver and
+often the need for adaption of large parts of the application to the
+new driver's API.
+
+SocketCAN was designed to overcome all of these limitations. A new
+protocol family has been implemented which provides a socket interface
+to user space applications and which builds upon the Linux network
+layer, enabling use all of the provided queueing functionality. A device
+driver for CAN controller hardware registers itself with the Linux
+network layer as a network device, so that CAN frames from the
+controller can be passed up to the network layer and on to the CAN
+protocol family module and also vice-versa. Also, the protocol family
+module provides an API for transport protocol modules to register, so
+that any number of transport protocols can be loaded or unloaded
+dynamically. In fact, the can core module alone does not provide any
+protocol and cannot be used without loading at least one additional
+protocol module. Multiple sockets can be opened at the same time,
+on different or the same protocol module and they can listen/send
+frames on different or the same CAN IDs. Several sockets listening on
+the same interface for frames with the same CAN ID are all passed the
+same received matching CAN frames. An application wishing to
+communicate using a specific transport protocol, e.g. ISO-TP, just
+selects that protocol when opening the socket, and then can read and
+write application data byte streams, without having to deal with
+CAN-IDs, frames, etc.
+
+Similar functionality visible from user-space could be provided by a
+character device, too, but this would lead to a technically inelegant
+solution for a couple of reasons:
+
+* **Intricate usage:** Instead of passing a protocol argument to
+ socket(2) and using bind(2) to select a CAN interface and CAN ID, an
+ application would have to do all these operations using ioctl(2)s.
+
+* **Code duplication:** A character device cannot make use of the Linux
+ network queueing code, so all that code would have to be duplicated
+ for CAN networking.
+
+* **Abstraction:** In most existing character-device implementations, the
+ hardware-specific device driver for a CAN controller directly
+ provides the character device for the application to work with.
+ This is at least very unusual in Unix systems for both, char and
+ block devices. For example you don't have a character device for a
+ certain UART of a serial interface, a certain sound chip in your
+ computer, a SCSI or IDE controller providing access to your hard
+ disk or tape streamer device. Instead, you have abstraction layers
+ which provide a unified character or block device interface to the
+ application on the one hand, and a interface for hardware-specific
+ device drivers on the other hand. These abstractions are provided
+ by subsystems like the tty layer, the audio subsystem or the SCSI
+ and IDE subsystems for the devices mentioned above.
+
+ The easiest way to implement a CAN device driver is as a character
+ device without such a (complete) abstraction layer, as is done by most
+ existing drivers. The right way, however, would be to add such a
+ layer with all the functionality like registering for certain CAN
+ IDs, supporting several open file descriptors and (de)multiplexing
+ CAN frames between them, (sophisticated) queueing of CAN frames, and
+ providing an API for device drivers to register with. However, then
+ it would be no more difficult, or may be even easier, to use the
+ networking framework provided by the Linux kernel, and this is what
+ SocketCAN does.
+
+The use of the networking framework of the Linux kernel is just the
+natural and most appropriate way to implement CAN for Linux.
+
+
+.. _socketcan-concept:
+
+SocketCAN Concept
+=================
+
+As described in :ref:`socketcan-motivation` the main goal of SocketCAN is to
+provide a socket interface to user space applications which builds
+upon the Linux network layer. In contrast to the commonly known
+TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
+medium that has no MAC-layer addressing like ethernet. The CAN-identifier
+(can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
+have to be chosen uniquely on the bus. When designing a CAN-ECU
+network the CAN-IDs are mapped to be sent by a specific ECU.
+For this reason a CAN-ID can be treated best as a kind of source address.
+
+
+.. _socketcan-receive-lists:
+
+Receive Lists
+-------------
+
+The network transparent access of multiple applications leads to the
+problem that different applications may be interested in the same
+CAN-IDs from the same CAN network interface. The SocketCAN core
+module - which implements the protocol family CAN - provides several
+high efficient receive lists for this reason. If e.g. a user space
+application opens a CAN RAW socket, the raw protocol module itself
+requests the (range of) CAN-IDs from the SocketCAN core that are
+requested by the user. The subscription and unsubscription of
+CAN-IDs can be done for specific CAN interfaces or for all(!) known
+CAN interfaces with the can_rx_(un)register() functions provided to
+CAN protocol modules by the SocketCAN core (see :ref:`socketcan-core-module`).
+To optimize the CPU usage at runtime the receive lists are split up
+into several specific lists per device that match the requested
+filter complexity for a given use-case.
+
+
+.. _socketcan-local-loopback1:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As known from other networking concepts the data exchanging
+applications may run on the same or different nodes without any
+change (except for the according addressing information):
+
+.. code::
+
+ ___ ___ ___ _______ ___
+ | _ | | _ | | _ | | _ _ | | _ |
+ ||A|| ||B|| ||C|| ||A| |B|| ||C||
+ |___| |___| |___| |_______| |___|
+ | | | | |
+ -----------------(1)- CAN bus -(2)---------------
+
+To ensure that application A receives the same information in the
+example (2) as it would receive in example (1) there is need for
+some kind of local loopback of the sent CAN frames on the appropriate
+node.
+
+The Linux network devices (by default) just can handle the
+transmission and reception of media dependent frames. Due to the
+arbitration on the CAN bus the transmission of a low prio CAN-ID
+may be delayed by the reception of a high prio CAN frame. To
+reflect the correct [*]_ traffic on the node the loopback of the sent
+data has to be performed right after a successful transmission. If
+the CAN network interface is not capable of performing the loopback for
+some reason the SocketCAN core can do this task as a fallback solution.
+See :ref:`socketcan-local-loopback1` for details (recommended).
+
+The loopback functionality is enabled by default to reflect standard
+networking behaviour for CAN applications. Due to some requests from
+the RT-SocketCAN group the loopback optionally may be disabled for each
+separate socket. See sockopts from the CAN RAW sockets in :ref:`socketcan-raw-sockets`.
+
+.. [*] you really like to have this when you're running analyser
+ tools like 'candump' or 'cansniffer' on the (same) node.
+
+
+.. _socketcan-network-problem-notifications:
+
+Network Problem Notifications
+-----------------------------
+
+The use of the CAN bus may lead to several problems on the physical
+and media access control layer. Detecting and logging of these lower
+layer problems is a vital requirement for CAN users to identify
+hardware issues on the physical transceiver layer as well as
+arbitration problems and error frames caused by the different
+ECUs. The occurrence of detected errors are important for diagnosis
+and have to be logged together with the exact timestamp. For this
+reason the CAN interface driver can generate so called Error Message
+Frames that can optionally be passed to the user application in the
+same way as other CAN frames. Whenever an error on the physical layer
+or the MAC layer is detected (e.g. by the CAN controller) the driver
+creates an appropriate error message frame. Error messages frames can
+be requested by the user application using the common CAN filter
+mechanisms. Inside this filter definition the (interested) type of
+errors may be selected. The reception of error messages is disabled
+by default. The format of the CAN error message frame is briefly
+described in the Linux header file "include/uapi/linux/can/error.h".
+
+
+How to use SocketCAN
+====================
+
+Like TCP/IP, you first need to open a socket for communicating over a
+CAN network. Since SocketCAN implements a new protocol family, you
+need to pass PF_CAN as the first argument to the socket(2) system
+call. Currently, there are two CAN protocols to choose from, the raw
+socket protocol and the broadcast manager (BCM). So to open a socket,
+you would write::
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+and::
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+respectively. After the successful creation of the socket, you would
+normally use the bind(2) system call to bind the socket to a CAN
+interface (which is different from TCP/IP due to different addressing
+- see :ref:`socketcan-concept`). After binding (CAN_RAW) or connecting (CAN_BCM)
+the socket, you can read(2) and write(2) from/to the socket or use
+send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
+on the socket as usual. There are also CAN specific socket options
+described below.
+
+The basic CAN frame structure and the sockaddr structure are defined
+in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
+ __u8 __pad; /* padding */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[8] __attribute__((aligned(8)));
+ };
+
+The alignment of the (linear) payload data[] to a 64bit boundary
+allows the user to define their own structs and unions to easily access
+the CAN payload. There is no given byteorder on the CAN bus by
+default. A read(2) system call on a CAN_RAW socket transfers a
+struct can_frame to the user space.
+
+The sockaddr_can structure has an interface index like the
+PF_PACKET socket, that also binds to a specific interface:
+
+.. code-block:: C
+
+ struct sockaddr_can {
+ sa_family_t can_family;
+ int can_ifindex;
+ union {
+ /* transport protocol class address info (e.g. ISOTP) */
+ struct { canid_t rx_id, tx_id; } tp;
+
+ /* reserved for future CAN protocols address information */
+ } can_addr;
+ };
+
+To determine the interface index an appropriate ioctl() has to
+be used (example for CAN_RAW sockets without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+ strcpy(ifr.ifr_name, "can0" );
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ bind(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+To bind a socket to all(!) CAN interfaces the interface index must
+be 0 (zero). In this case the socket receives CAN frames from every
+enabled CAN interface. To determine the originating CAN interface
+the system call recvfrom(2) may be used instead of read(2). To send
+on a socket that is bound to 'any' interface sendto(2) is needed to
+specify the outgoing interface.
+
+Reading CAN frames from a bound CAN_RAW socket (see above) consists
+of reading a struct can_frame:
+
+.. code-block:: C
+
+ struct can_frame frame;
+
+ nbytes = read(s, &frame, sizeof(struct can_frame));
+
+ if (nbytes < 0) {
+ perror("can raw socket read");
+ return 1;
+ }
+
+ /* paranoid check ... */
+ if (nbytes < sizeof(struct can_frame)) {
+ fprintf(stderr, "read: incomplete CAN frame\n");
+ return 1;
+ }
+
+ /* do something with the received CAN frame */
+
+Writing CAN frames can be done similarly, with the write(2) system call::
+
+ nbytes = write(s, &frame, sizeof(struct can_frame));
+
+When the CAN interface is bound to 'any' existing CAN interface
+(addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
+information about the originating CAN interface is needed:
+
+.. code-block:: C
+
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ socklen_t len = sizeof(addr);
+ struct can_frame frame;
+
+ nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, &len);
+
+ /* get interface name of the received CAN frame */
+ ifr.ifr_ifindex = addr.can_ifindex;
+ ioctl(s, SIOCGIFNAME, &ifr);
+ printf("Received a CAN frame from interface %s", ifr.ifr_name);
+
+To write CAN frames on sockets bound to 'any' CAN interface the
+outgoing interface has to be defined certainly:
+
+.. code-block:: C
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+ addr.can_ifindex = ifr.ifr_ifindex;
+ addr.can_family = AF_CAN;
+
+ nbytes = sendto(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, sizeof(addr));
+
+An accurate timestamp can be obtained with an ioctl(2) call after reading
+a message from the socket:
+
+.. code-block:: C
+
+ struct timeval tv;
+ ioctl(s, SIOCGSTAMP, &tv);
+
+The timestamp has a resolution of one microsecond and is set automatically
+at the reception of a CAN frame.
+
+Remark about CAN FD (flexible data rate) support:
+
+Generally the handling of CAN FD is very similar to the formerly described
+examples. The new CAN FD capable CAN controllers support two different
+bitrates for the arbitration phase and the payload phase of the CAN FD frame
+and up to 64 bytes of payload. This extended payload length breaks all the
+kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
+bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
+the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
+switches the socket into a mode that allows the handling of CAN FD frames
+and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`).
+
+The struct canfd_frame is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct canfd_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 len; /* frame payload length in byte (0 .. 64) */
+ __u8 flags; /* additional flags for CAN FD */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[64] __attribute__((aligned(8)));
+ };
+
+The struct canfd_frame and the existing struct can_frame have the can_id,
+the payload length and the payload data at the same offset inside their
+structures. This allows to handle the different structures very similar.
+When the content of a struct can_frame is copied into a struct canfd_frame
+all structure elements can be used as-is - only the data[] becomes extended.
+
+When introducing the struct canfd_frame it turned out that the data length
+code (DLC) of the struct can_frame was used as a length information as the
+length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
+the easy handling of the length information the canfd_frame.len element
+contains a plain length value from 0 .. 64. So both canfd_frame.len and
+can_frame.can_dlc are equal and contain a length information and no DLC.
+For details about the distinction of CAN and CAN FD capable devices and
+the mapping to the bus-relevant data length code (DLC), see :ref:`socketcan-can-fd-driver`.
+
+The length of the two CAN(FD) frame structures define the maximum transfer
+unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
+definitions are specified for CAN specific MTUs in include/linux/can.h:
+
+.. code-block:: C
+
+ #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
+ #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
+
+
+.. _socketcan-raw-sockets:
+
+RAW Protocol Sockets with can_filters (SOCK_RAW)
+------------------------------------------------
+
+Using CAN_RAW sockets is extensively comparable to the commonly
+known access to CAN character devices. To meet the new possibilities
+provided by the multi user SocketCAN approach, some reasonable
+defaults are set at RAW socket binding time:
+
+- The filters are set to exactly one filter receiving everything
+- The socket only receives valid data frames (=> no error message frames)
+- The loopback of sent CAN frames is enabled (see :ref:`socketcan-local-loopback2`)
+- The socket does not receive its own sent frames (in loopback mode)
+
+These default settings may be changed before or after binding the socket.
+To use the referenced definitions of the socket options for CAN_RAW
+sockets, include <linux/can/raw.h>.
+
+
+.. _socketcan-rawfilter:
+
+RAW socket option CAN_RAW_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The reception of CAN frames using CAN_RAW sockets can be controlled
+by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
+
+The CAN filter structure is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_filter {
+ canid_t can_id;
+ canid_t can_mask;
+ };
+
+A filter matches, when:
+
+.. code-block:: C
+
+ <received_can_id> & mask == can_id & mask
+
+which is analogous to known CAN controllers hardware filter semantics.
+The filter can be inverted in this semantic, when the CAN_INV_FILTER
+bit is set in can_id element of the can_filter structure. In
+contrast to CAN controller hardware filters the user may set 0 .. n
+receive filters for each open socket separately:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+ rfilter[1].can_id = 0x200;
+ rfilter[1].can_mask = 0x700;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+To disable the reception of CAN frames on the selected CAN_RAW socket:
+
+.. code-block:: C
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
+
+To set the filters to zero filters is quite obsolete as to not read
+data causes the raw socket to discard the received CAN frames. But
+having this 'send only' use-case we may remove the receive list in the
+Kernel to save a little (really a very little!) CPU usage.
+
+CAN Filter Usage Optimisation
+.............................
+
+The CAN filters are processed in per-device filter lists at CAN frame
+reception time. To reduce the number of checks that need to be performed
+while walking through the filter lists the CAN core provides an optimized
+filter handling when the filter subscription focusses on a single CAN ID.
+
+For the possible 2048 SFF CAN identifiers the identifier is used as an index
+to access the corresponding subscription list without any further checks.
+For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
+hash function to retrieve the EFF table index.
+
+To benefit from the optimized filters for single CAN identifiers the
+CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
+with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
+can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
+subscribed. E.g. in the example from above:
+
+.. code-block:: C
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+
+both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
+
+To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
+filter has to be defined in this way to benefit from the optimized filters:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
+ rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
+ rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+
+RAW Socket Option CAN_RAW_ERR_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As described in :ref:`socketcan-network-problem-notifications` the CAN interface driver can generate so
+called Error Message Frames that can optionally be passed to the user
+application in the same way as other CAN frames. The possible
+errors are divided into different error classes that may be filtered
+using the appropriate error mask. To register for every possible
+error condition CAN_ERR_MASK can be used as value for the error mask.
+The values for the error mask are defined in linux/can/error.h:
+
+.. code-block:: C
+
+ can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
+ &err_mask, sizeof(err_mask));
+
+
+RAW Socket Option CAN_RAW_LOOPBACK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To meet multi user needs the local loopback is enabled by default
+(see :ref:`socketcan-local-loopback1` for details). But in some embedded use-cases
+(e.g. when only one application uses the CAN bus) this loopback
+functionality can be disabled (separately for each socket):
+
+.. code-block:: C
+
+ int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
+
+
+RAW socket option CAN_RAW_RECV_OWN_MSGS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When the local loopback is enabled, all the sent CAN frames are
+looped back to the open CAN sockets that registered for the CAN
+frames' CAN-ID on this given interface to meet the multi user
+needs. The reception of the CAN frames on the same socket that was
+sending the CAN frame is assumed to be unwanted and therefore
+disabled by default. This default behaviour may be changed on
+demand:
+
+.. code-block:: C
+
+ int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+
+.. _socketcan-rawfd:
+
+RAW Socket Option CAN_RAW_FD_FRAMES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+CAN FD support in CAN_RAW sockets can be enabled with a new socket option
+CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
+not supported by the CAN_RAW socket (e.g. on older kernels), switching the
+CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
+
+Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
+and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
+when reading from the socket:
+
+.. code-block:: C
+
+ CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
+ CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
+
+Example:
+
+.. code-block:: C
+
+ [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
+
+ struct canfd_frame cfd;
+
+ nbytes = read(s, &cfd, CANFD_MTU);
+
+ if (nbytes == CANFD_MTU) {
+ printf("got CAN FD frame with length %d\n", cfd.len);
+ /* cfd.flags contains valid data */
+ } else if (nbytes == CAN_MTU) {
+ printf("got legacy CAN frame with length %d\n", cfd.len);
+ /* cfd.flags is undefined */
+ } else {
+ fprintf(stderr, "read: invalid CAN(FD) frame\n");
+ return 1;
+ }
+
+ /* the content can be handled independently from the received MTU size */
+
+ printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
+ for (i = 0; i < cfd.len; i++)
+ printf("%02X ", cfd.data[i]);
+
+When reading with size CANFD_MTU only returns CAN_MTU bytes that have
+been received from the socket a legacy CAN frame has been read into the
+provided CAN FD structure. Note that the canfd_frame.flags data field is
+not specified in the struct can_frame and therefore it is only valid in
+CANFD_MTU sized CAN FD frames.
+
+Implementation hint for new CAN applications:
+
+To build a CAN FD aware application use struct canfd_frame as basic CAN
+data structure for CAN_RAW based applications. When the application is
+executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
+socket option returns an error: No problem. You'll get legacy CAN frames
+or CAN FD frames and can process them the same way.
+
+When sending to CAN devices make sure that the device is capable to handle
+CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+
+
+RAW socket option CAN_RAW_JOIN_FILTERS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN_RAW socket can set multiple CAN identifier specific filters that
+lead to multiple filters in the af_can.c filter processing. These filters
+are indenpendent from each other which leads to logical OR'ed filters when
+applied (see :ref:`socketcan-rawfilter`).
+
+This socket option joines the given CAN filters in the way that only CAN
+frames are passed to user space that matched *all* given CAN filters. The
+semantic for the applied filters is therefore changed to a logical AND.
+
+This is useful especially when the filterset is a combination of filters
+where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+CAN ID ranges from the incoming traffic.
+
+
+RAW Socket Returned Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using recvmsg() call, the msg->msg_flags may contain following flags:
+
+MSG_DONTROUTE:
+ set when the received frame was created on the local host.
+
+MSG_CONFIRM:
+ set when the frame was sent via the socket it is received on.
+ This flag can be interpreted as a 'transmission confirmation' when the
+ CAN driver supports the echo of frames on driver level, see
+ :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`.
+ In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
+
+
+Broadcast Manager Protocol Sockets (SOCK_DGRAM)
+-----------------------------------------------
+
+The Broadcast Manager protocol provides a command based configuration
+interface to filter and send (e.g. cyclic) CAN messages in kernel space.
+
+Receive filters can be used to down sample frequent messages; detect events
+such as message contents changes, packet length changes, and do time-out
+monitoring of received messages.
+
+Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
+created and modified at runtime; both the message content and the two
+possible transmit intervals can be altered.
+
+A BCM socket is not intended for sending individual CAN frames using the
+struct can_frame as known from the CAN_RAW socket. Instead a special BCM
+configuration message is defined. The basic BCM configuration message used
+to communicate with the broadcast manager and the available operations are
+defined in the linux/can/bcm.h include. The BCM message consists of a
+message header with a command ('opcode') followed by zero or more CAN frames.
+The broadcast manager sends responses to user space in the same form:
+
+.. code-block:: C
+
+ struct bcm_msg_head {
+ __u32 opcode; /* command */
+ __u32 flags; /* special flags */
+ __u32 count; /* run 'count' times with ival1 */
+ struct timeval ival1, ival2; /* count and subsequent interval */
+ canid_t can_id; /* unique can_id for task */
+ __u32 nframes; /* number of can_frames following */
+ struct can_frame frames[0];
+ };
+
+The aligned payload 'frames' uses the same basic CAN frame structure defined
+at the beginning of :ref:`socketcan-rawfd` and in the include/linux/can.h include. All
+messages to the broadcast manager from user space have this structure.
+
+Note a CAN_BCM socket must be connected instead of bound after socket
+creation (example without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ connect(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+The broadcast manager socket is able to handle any number of in flight
+transmissions or receive filters concurrently. The different RX/TX jobs are
+distinguished by the unique can_id in each BCM message. However additional
+CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
+When the broadcast manager socket is bound to 'any' CAN interface (=> the
+interface index is set to zero) the configured receive filters apply to any
+CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
+interface index. When using recvfrom() instead of read() to retrieve BCM
+socket messages the originating CAN interface is provided in can_ifindex.
+
+
+Broadcast Manager Operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The opcode defines the operation for the broadcast manager to carry out,
+or details the broadcast managers response to several events, including
+user requests.
+
+Transmit Operations (user space to broadcast manager):
+
+TX_SETUP:
+ Create (cyclic) transmission task.
+
+TX_DELETE:
+ Remove (cyclic) transmission task, requires only can_id.
+
+TX_READ:
+ Read properties of (cyclic) transmission task for can_id.
+
+TX_SEND:
+ Send one CAN frame.
+
+Transmit Responses (broadcast manager to user space):
+
+TX_STATUS:
+ Reply to TX_READ request (transmission task configuration).
+
+TX_EXPIRED:
+ Notification when counter finishes sending at initial interval
+ 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
+
+Receive Operations (user space to broadcast manager):
+
+RX_SETUP:
+ Create RX content filter subscription.
+
+RX_DELETE:
+ Remove RX content filter subscription, requires only can_id.
+
+RX_READ:
+ Read properties of RX content filter subscription for can_id.
+
+Receive Responses (broadcast manager to user space):
+
+RX_STATUS:
+ Reply to RX_READ request (filter task configuration).
+
+RX_TIMEOUT:
+ Cyclic message is detected to be absent (timer ival1 expired).
+
+RX_CHANGED:
+ BCM message with updated CAN frame (detected content change).
+ Sent on first message received or on receipt of revised CAN messages.
+
+
+Broadcast Manager Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When sending a message to the broadcast manager the 'flags' element may
+contain the following flag definitions which influence the behaviour:
+
+SETTIMER:
+ Set the values of ival1, ival2 and count
+
+STARTTIMER:
+ Start the timer with the actual values of ival1, ival2
+ and count. Starting the timer leads simultaneously to emit a CAN frame.
+
+TX_COUNTEVT:
+ Create the message TX_EXPIRED when count expires
+
+TX_ANNOUNCE:
+ A change of data by the process is emitted immediately.
+
+TX_CP_CAN_ID:
+ Copies the can_id from the message header to each
+ subsequent frame in frames. This is intended as usage simplification. For
+ TX tasks the unique can_id from the message header may differ from the
+ can_id(s) stored for transmission in the subsequent struct can_frame(s).
+
+RX_FILTER_ID:
+ Filter by can_id alone, no frames required (nframes=0).
+
+RX_CHECK_DLC:
+ A change of the DLC leads to an RX_CHANGED.
+
+RX_NO_AUTOTIMER:
+ Prevent automatically starting the timeout monitor.
+
+RX_ANNOUNCE_RESUME:
+ If passed at RX_SETUP and a receive timeout occurred, a
+ RX_CHANGED message will be generated when the (cyclic) receive restarts.
+
+TX_RESET_MULTI_IDX:
+ Reset the index for the multiple frame transmission.
+
+RX_RTR_FRAME:
+ Send reply for RTR-request (placed in op->frames[0]).
+
+
+Broadcast Manager Transmission Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Periodic transmission configurations may use up to two interval timers.
+In this case the BCM sends a number of messages ('count') at an interval
+'ival1', then continuing to send at another given interval 'ival2'. When
+only one timer is needed 'count' is set to zero and only 'ival2' is used.
+When SET_TIMER and START_TIMER flag were set the timers are activated.
+The timer values can be altered at runtime when only SET_TIMER is set.
+
+
+Broadcast Manager message sequence transmission
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
+TX task configuration. The number of CAN frames is provided in the 'nframes'
+element of the BCM message head. The defined number of CAN frames are added
+as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* create a struct to set up a sequence of four CAN frames */
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[4];
+ } mytxmsg;
+
+ (..)
+ mytxmsg.msg_head.nframes = 4;
+ (..)
+
+ write(s, &mytxmsg, sizeof(mytxmsg));
+
+With every transmission the index in the array of CAN frames is increased
+and set to zero at index overflow.
+
+
+Broadcast Manager Receive Filter Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
+When the SET_TIMER flag is set the timers are enabled:
+
+ival1:
+ Send RX_TIMEOUT when a received message is not received again within
+ the given time. When START_TIMER is set at RX_SETUP the timeout detection
+ is activated directly - even without a former CAN frame reception.
+
+ival2:
+ Throttle the received message rate down to the value of ival2. This
+ is useful to reduce messages for the application when the signal inside the
+ CAN frame is stateless as state changes within the ival2 periode may get
+ lost.
+
+Broadcast Manager Multiplex Message Receive Filter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To filter for content changes in multiplex message sequences an array of more
+than one CAN frames can be passed in a RX_SETUP configuration message. The
+data bytes of the first CAN frame contain the mask of relevant bits that
+have to match in the subsequent CAN frames with the received CAN frame.
+If one of the subsequent CAN frames is matching the bits in that frame data
+mark the relevant content to be compared with the previous received content.
+Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
+filters) can be added as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* usually used to clear CAN frame data[] - beware of endian problems! */
+ #define U64_DATA(p) (*(unsigned long long*)(p)->data)
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = 0;
+ msg.msg_head.nframes = 5;
+ U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
+ U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
+ U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
+ U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
+ U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
+
+ write(s, &msg, sizeof(msg));
+
+
+Broadcast Manager CAN FD Support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The programming API of the CAN_BCM depends on struct can_frame which is
+given as array directly behind the bcm_msg_head structure. To follow this
+schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
+flags indicates that the concatenated CAN frame structures behind the
+bcm_msg_head are defined as struct canfd_frame:
+
+.. code-block:: C
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct canfd_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = CAN_FD_FRAME;
+ msg.msg_head.nframes = 5;
+ (..)
+
+When using CAN FD frames for multiplex filtering the MUX mask is still
+expected in the first 64 bit of the struct canfd_frame data section.
+
+
+Connected Transport Protocols (SOCK_SEQPACKET)
+----------------------------------------------
+
+(to be written)
+
+
+Unconnected Transport Protocols (SOCK_DGRAM)
+--------------------------------------------
+
+(to be written)
+
+
+.. _socketcan-core-module:
+
+SocketCAN Core Module
+=====================
+
+The SocketCAN core module implements the protocol family
+PF_CAN. CAN protocol modules are loaded by the core module at
+runtime. The core module provides an interface for CAN protocol
+modules to subscribe needed CAN IDs (see :ref:`socketcan-receive-lists`).
+
+
+can.ko Module Params
+--------------------
+
+- **stats_timer**:
+ To calculate the SocketCAN core statistics
+ (e.g. current/maximum frames per second) this 1 second timer is
+ invoked at can.ko module start time by default. This timer can be
+ disabled by using stattimer=0 on the module commandline.
+
+- **debug**:
+ (removed since SocketCAN SVN r546)
+
+
+procfs content
+--------------
+
+As described in :ref:`socketcan-receive-lists` the SocketCAN core uses several filter
+lists to deliver received CAN frames to CAN protocol modules. These
+receive lists, their filters and the count of filter matches can be
+checked in the appropriate receive list. All entries contain the
+device and a protocol module identifier::
+
+ foo@bar:~$ cat /proc/net/can/rcvlist_all
+
+ receive list 'rx_all':
+ (vcan3: no entry)
+ (vcan2: no entry)
+ (vcan1: no entry)
+ device can_id can_mask function userdata matches ident
+ vcan0 000 00000000 f88e6370 f6c6f400 0 raw
+ (any: no entry)
+
+In this example an application requests any CAN traffic from vcan0::
+
+ rcvlist_all - list for unfiltered entries (no filter operations)
+ rcvlist_eff - list for single extended frame (EFF) entries
+ rcvlist_err - list for error message frames masks
+ rcvlist_fil - list for mask/value filters
+ rcvlist_inv - list for mask/value filters (inverse semantic)
+ rcvlist_sff - list for single standard frame (SFF) entries
+
+Additional procfs files in /proc/net/can::
+
+ stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
+ reset_stats - manual statistic reset
+ version - prints the SocketCAN core version and the ABI version
+
+
+Writing Own CAN Protocol Modules
+--------------------------------
+
+To implement a new protocol in the protocol family PF_CAN a new
+protocol has to be defined in include/linux/can.h .
+The prototypes and definitions to use the SocketCAN core can be
+accessed by including include/linux/can/core.h .
+In addition to functions that register the CAN protocol and the
+CAN device notifier chain there are functions to subscribe CAN
+frames received by CAN interfaces and to send CAN frames::
+
+ can_rx_register - subscribe CAN frames from a specific interface
+ can_rx_unregister - unsubscribe CAN frames from a specific interface
+ can_send - transmit a CAN frame (optional with local loopback)
+
+For details see the kerneldoc documentation in net/can/af_can.c or
+the source code of net/can/raw.c or net/can/bcm.c .
+
+
+CAN Network Drivers
+===================
+
+Writing a CAN network device driver is much easier than writing a
+CAN character device driver. Similar to other known network device
+drivers you mainly have to deal with:
+
+- TX: Put the CAN frame from the socket buffer to the CAN controller.
+- RX: Put the CAN frame from the CAN controller to the socket buffer.
+
+See e.g. at Documentation/networking/netdevices.txt . The differences
+for writing CAN network device driver are described below:
+
+
+General Settings
+----------------
+
+.. code-block:: C
+
+ dev->type = ARPHRD_CAN; /* the netdevice hardware type */
+ dev->flags = IFF_NOARP; /* CAN has no arp */
+
+ dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
+
+ or alternative, when the controller supports CAN with flexible data rate:
+ dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
+
+The struct can_frame or struct canfd_frame is the payload of each socket
+buffer (skbuff) in the protocol family PF_CAN.
+
+
+.. _socketcan-local-loopback2:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As described in :ref:`socketcan-local-loopback1` the CAN network device driver should
+support a local loopback functionality similar to the local echo
+e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
+set to prevent the PF_CAN core from locally echoing sent frames
+(aka loopback) as fallback solution::
+
+ dev->flags = (IFF_NOARP | IFF_ECHO);
+
+
+CAN Controller Hardware Filters
+-------------------------------
+
+To reduce the interrupt load on deep embedded systems some CAN
+controllers support the filtering of CAN IDs or ranges of CAN IDs.
+These hardware filter capabilities vary from controller to
+controller and have to be identified as not feasible in a multi-user
+networking approach. The use of the very controller specific
+hardware filters could make sense in a very dedicated use-case, as a
+filter on driver level would affect all users in the multi-user
+system. The high efficient filter sets inside the PF_CAN core allow
+to set different multiple filters for each socket separately.
+Therefore the use of hardware filters goes to the category 'handmade
+tuning on deep embedded systems'. The author is running a MPC603e
+@133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
+load without any problems ...
+
+
+The Virtual CAN Driver (vcan)
+-----------------------------
+
+Similar to the network loopback devices, vcan offers a virtual local
+CAN interface. A full qualified address on CAN consists of
+
+- a unique CAN Identifier (CAN ID)
+- the CAN bus this CAN ID is transmitted on (e.g. can0)
+
+so in common use cases more than one virtual CAN interface is needed.
+
+The virtual CAN interfaces allow the transmission and reception of CAN
+frames without real CAN controller hardware. Virtual CAN network
+devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
+When compiled as a module the virtual CAN driver module is called vcan.ko
+
+Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
+netlink interface to create vcan network devices. The creation and
+removal of vcan network devices can be managed with the ip(8) tool::
+
+ - Create a virtual CAN network interface:
+ $ ip link add type vcan
+
+ - Create a virtual CAN network interface with a specific name 'vcan42':
+ $ ip link add dev vcan42 type vcan
+
+ - Remove a (virtual CAN) network interface 'vcan42':
+ $ ip link del vcan42
+
+
+The CAN Network Device Driver Interface
+---------------------------------------
+
+The CAN network device driver interface provides a generic interface
+to setup, configure and monitor CAN network devices. The user can then
+configure the CAN device, like setting the bit-timing parameters, via
+the netlink interface using the program "ip" from the "IPROUTE2"
+utility suite. The following chapter describes briefly how to use it.
+Furthermore, the interface uses a common data structure and exports a
+set of common functions, which all real CAN network device drivers
+should use. Please have a look to the SJA1000 or MSCAN driver to
+understand how to use them. The name of the module is can-dev.ko.
+
+
+Netlink interface to set/get devices properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN device must be configured via netlink interface. The supported
+netlink message types are defined and briefly described in
+"include/linux/can/netlink.h". CAN link support for the program "ip"
+of the IPROUTE2 utility suite is available and it can be used as shown
+below:
+
+Setting CAN device properties::
+
+ $ ip link set can0 type can help
+ Usage: ip link set DEVICE type can
+ [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
+ [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
+ phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
+
+ [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
+ [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
+ dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
+
+ [ loopback { on | off } ]
+ [ listen-only { on | off } ]
+ [ triple-sampling { on | off } ]
+ [ one-shot { on | off } ]
+ [ berr-reporting { on | off } ]
+ [ fd { on | off } ]
+ [ fd-non-iso { on | off } ]
+ [ presume-ack { on | off } ]
+
+ [ restart-ms TIME-MS ]
+ [ restart ]
+
+ Where: BITRATE := { 1..1000000 }
+ SAMPLE-POINT := { 0.000..0.999 }
+ TQ := { NUMBER }
+ PROP-SEG := { 1..8 }
+ PHASE-SEG1 := { 1..8 }
+ PHASE-SEG2 := { 1..8 }
+ SJW := { 1..4 }
+ RESTART-MS := { 0 | NUMBER }
+
+Display CAN device details and statistics::
+
+ $ ip -details -statistics link show can0
+ 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
+ link/can
+ can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
+ bitrate 125000 sample_point 0.875
+ tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
+ sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+ clock 8000000
+ re-started bus-errors arbit-lost error-warn error-pass bus-off
+ 41 17457 0 41 42 41
+ RX: bytes packets errors dropped overrun mcast
+ 140859 17608 17457 0 0 0
+ TX: bytes packets errors dropped carrier collsns
+ 861 112 0 41 0 0
+
+More info to the above output:
+
+"<TRIPLE-SAMPLING>"
+ Shows the list of selected CAN controller modes: LOOPBACK,
+ LISTEN-ONLY, or TRIPLE-SAMPLING.
+
+"state ERROR-ACTIVE"
+ The current state of the CAN controller: "ERROR-ACTIVE",
+ "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
+
+"restart-ms 100"
+ Automatic restart delay time. If set to a non-zero value, a
+ restart of the CAN controller will be triggered automatically
+ in case of a bus-off condition after the specified delay time
+ in milliseconds. By default it's off.
+
+"bitrate 125000 sample-point 0.875"
+ Shows the real bit-rate in bits/sec and the sample-point in the
+ range 0.000..0.999. If the calculation of bit-timing parameters
+ is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
+ bit-timing can be defined by setting the "bitrate" argument.
+ Optionally the "sample-point" can be specified. By default it's
+ 0.000 assuming CIA-recommended sample-points.
+
+"tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
+ Shows the time quanta in ns, propagation segment, phase buffer
+ segment 1 and 2 and the synchronisation jump width in units of
+ tq. They allow to define the CAN bit-timing in a hardware
+ independent format as proposed by the Bosch CAN 2.0 spec (see
+ chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
+
+"sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 clock 8000000"
+ Shows the bit-timing constants of the CAN controller, here the
+ "sja1000". The minimum and maximum values of the time segment 1
+ and 2, the synchronisation jump width in units of tq, the
+ bitrate pre-scaler and the CAN system clock frequency in Hz.
+ These constants could be used for user-defined (non-standard)
+ bit-timing calculation algorithms in user-space.
+
+"re-started bus-errors arbit-lost error-warn error-pass bus-off"
+ Shows the number of restarts, bus and arbitration lost errors,
+ and the state changes to the error-warning, error-passive and
+ bus-off state. RX overrun errors are listed in the "overrun"
+ field of the standard network statistics.
+
+Setting the CAN Bit-Timing
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN bit-timing parameters can always be defined in a hardware
+independent format as proposed in the Bosch CAN 2.0 specification
+specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
+and "sjw"::
+
+ $ ip link set canX type can tq 125 prop-seg 6 \
+ phase-seg1 7 phase-seg2 2 sjw 1
+
+If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
+recommended CAN bit-timing parameters will be calculated if the bit-
+rate is specified with the argument "bitrate"::
+
+ $ ip link set canX type can bitrate 125000
+
+Note that this works fine for the most common CAN controllers with
+standard bit-rates but may *fail* for exotic bit-rates or CAN system
+clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
+space and allows user-space tools to solely determine and set the
+bit-timing parameters. The CAN controller specific bit-timing
+constants can be used for that purpose. They are listed by the
+following command::
+
+ $ ip -details link show can0
+ ...
+ sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+
+
+Starting and Stopping the CAN Network Device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A CAN network device is started or stopped as usual with the command
+"ifconfig canX up/down" or "ip link set canX up/down". Be aware that
+you *must* define proper bit-timing parameters for real CAN devices
+before you can start it to avoid error-prone default settings::
+
+ $ ip link set canX up type can bitrate 125000
+
+A device may enter the "bus-off" state if too many errors occurred on
+the CAN bus. Then no more messages are received or sent. An automatic
+bus-off recovery can be enabled by setting the "restart-ms" to a
+non-zero value, e.g.::
+
+ $ ip link set canX type can restart-ms 100
+
+Alternatively, the application may realize the "bus-off" condition
+by monitoring CAN error message frames and do a restart when
+appropriate with the command::
+
+ $ ip link set canX type can restart
+
+Note that a restart will also create a CAN error message frame (see
+also :ref:`socketcan-network-problem-notifications`).
+
+
+.. _socketcan-can-fd-driver:
+
+CAN FD (Flexible Data Rate) Driver Support
+------------------------------------------
+
+CAN FD capable CAN controllers support two different bitrates for the
+arbitration phase and the payload phase of the CAN FD frame. Therefore a
+second bit timing has to be specified in order to enable the CAN FD bitrate.
+
+Additionally CAN FD capable CAN controllers support up to 64 bytes of
+payload. The representation of this length in can_frame.can_dlc and
+canfd_frame.len for userspace applications and inside the Linux network
+layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
+The data length code was a 1:1 mapping to the payload length in the legacy
+CAN frames anyway. The payload length to the bus-relevant DLC mapping is
+only performed inside the CAN drivers, preferably with the helper
+functions can_dlc2len() and can_len2dlc().
+
+The CAN netdevice driver capabilities can be distinguished by the network
+devices maximum transfer unit (MTU)::
+
+ MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
+ MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
+
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+N.B. CAN FD capable devices can also handle and send legacy CAN frames.
+
+When configuring CAN FD capable CAN controllers an additional 'data' bitrate
+has to be set. This bitrate for the data phase of the CAN FD frame has to be
+at least the bitrate which was configured for the arbitration phase. This
+second bitrate is specified analogue to the first bitrate but the bitrate
+setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
+dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
+within the configuration process the controller option "fd on" can be
+specified to enable the CAN FD mode in the CAN controller. This controller
+option also switches the device MTU to 72 (CANFD_MTU).
+
+The first CAN FD specification presented as whitepaper at the International
+CAN Conference 2012 needed to be improved for data integrity reasons.
+Therefore two CAN FD implementations have to be distinguished today:
+
+- ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
+- non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
+
+Finally there are three types of CAN FD controllers:
+
+1. ISO compliant (fixed)
+2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
+3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
+
+The current ISO/non-ISO mode is announced by the CAN controller driver via
+netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
+The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
+switchable CAN FD controllers only.
+
+Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate::
+
+ $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
+ dbitrate 4000000 dsample-point 0.8 fd on
+ $ ip -details link show can0
+ 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
+ mode DEFAULT group default qlen 10
+ link/can promiscuity 0
+ can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+ bitrate 500000 sample-point 0.750
+ tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
+ pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
+ brp-inc 1
+ dbitrate 4000000 dsample-point 0.800
+ dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
+ pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
+ dbrp-inc 1
+ clock 80000000
+
+Example when 'fd-non-iso on' is added on this switchable CAN FD adapter::
+
+ can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+
+
+Supported CAN Hardware
+----------------------
+
+Please check the "Kconfig" file in "drivers/net/can" to get an actual
+list of the support CAN hardware. On the SocketCAN project website
+(see :ref:`socketcan-resources`) there might be further drivers available, also for
+older kernel versions.
+
+
+.. _socketcan-resources:
+
+SocketCAN Resources
+===================
+
+The Linux CAN / SocketCAN project resources (project site / mailing list)
+are referenced in the MAINTAINERS file in the Linux source tree.
+Search for CAN NETWORK [LAYERS|DRIVERS].
+
+Credits
+=======
+
+- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
+- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
+- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
+- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews, CAN device driver interface, MSCAN driver)
+- Robert Schwebel (design reviews, PTXdist integration)
+- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
+- Benedikt Spranger (reviews)
+- Thomas Gleixner (LKML reviews, coding style, posting hints)
+- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
+- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
+- Klaus Hitschler (PEAK driver integration)
+- Uwe Koppe (CAN netdevices with PF_PACKET approach)
+- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
+- Pavel Pisa (Bit-timing calculation)
+- Sascha Hauer (SJA1000 platform driver)
+- Sebastian Haas (SJA1000 EMS PCI driver)
+- Markus Plessing (SJA1000 EMS PCI driver)
+- Per Dalen (SJA1000 Kvaser PCI driver)
+- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
deleted file mode 100644
index aa15b9e..0000000
--- a/Documentation/networking/can.txt
+++ /dev/null
@@ -1,1308 +0,0 @@
-============================================================================
-
-can.txt
-
-Readme file for the Controller Area Network Protocol Family (aka SocketCAN)
-
-This file contains
-
- 1 Overview / What is SocketCAN
-
- 2 Motivation / Why using the socket API
-
- 3 SocketCAN concept
- 3.1 receive lists
- 3.2 local loopback of sent frames
- 3.3 network problem notifications
-
- 4 How to use SocketCAN
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
- 4.1.1 RAW socket option CAN_RAW_FILTER
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
- 4.1.7 RAW socket returned message flags
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
- 4.2.1 Broadcast Manager operations
- 4.2.2 Broadcast Manager message flags
- 4.2.3 Broadcast Manager transmission timers
- 4.2.4 Broadcast Manager message sequence transmission
- 4.2.5 Broadcast Manager receive filter timers
- 4.2.6 Broadcast Manager multiplex message receive filter
- 4.2.7 Broadcast Manager CAN FD support
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
- 5 SocketCAN core module
- 5.1 can.ko module params
- 5.2 procfs content
- 5.3 writing own CAN protocol modules
-
- 6 CAN network drivers
- 6.1 general settings
- 6.2 local loopback of sent frames
- 6.3 CAN controller hardware filters
- 6.4 The virtual CAN driver (vcan)
- 6.5 The CAN network device driver interface
- 6.5.1 Netlink interface to set/get devices properties
- 6.5.2 Setting the CAN bit-timing
- 6.5.3 Starting and stopping the CAN network device
- 6.6 CAN FD (flexible data rate) driver support
- 6.7 supported CAN hardware
-
- 7 SocketCAN resources
-
- 8 Credits
-
-============================================================================
-
-1. Overview / What is SocketCAN
---------------------------------
-
-The socketcan package is an implementation of CAN protocols
-(Controller Area Network) for Linux. CAN is a networking technology
-which has widespread use in automation, embedded devices, and
-automotive fields. While there have been other CAN implementations
-for Linux based on character devices, SocketCAN uses the Berkeley
-socket API, the Linux network stack and implements the CAN device
-drivers as network interfaces. The CAN socket API has been designed
-as similar as possible to the TCP/IP protocols to allow programmers,
-familiar with network programming, to easily learn how to use CAN
-sockets.
-
-2. Motivation / Why using the socket API
-----------------------------------------
-
-There have been CAN implementations for Linux before SocketCAN so the
-question arises, why we have started another project. Most existing
-implementations come as a device driver for some CAN hardware, they
-are based on character devices and provide comparatively little
-functionality. Usually, there is only a hardware-specific device
-driver which provides a character device interface to send and
-receive raw CAN frames, directly to/from the controller hardware.
-Queueing of frames and higher-level transport protocols like ISO-TP
-have to be implemented in user space applications. Also, most
-character-device implementations support only one single process to
-open the device at a time, similar to a serial interface. Exchanging
-the CAN controller requires employment of another device driver and
-often the need for adaption of large parts of the application to the
-new driver's API.
-
-SocketCAN was designed to overcome all of these limitations. A new
-protocol family has been implemented which provides a socket interface
-to user space applications and which builds upon the Linux network
-layer, enabling use all of the provided queueing functionality. A device
-driver for CAN controller hardware registers itself with the Linux
-network layer as a network device, so that CAN frames from the
-controller can be passed up to the network layer and on to the CAN
-protocol family module and also vice-versa. Also, the protocol family
-module provides an API for transport protocol modules to register, so
-that any number of transport protocols can be loaded or unloaded
-dynamically. In fact, the can core module alone does not provide any
-protocol and cannot be used without loading at least one additional
-protocol module. Multiple sockets can be opened at the same time,
-on different or the same protocol module and they can listen/send
-frames on different or the same CAN IDs. Several sockets listening on
-the same interface for frames with the same CAN ID are all passed the
-same received matching CAN frames. An application wishing to
-communicate using a specific transport protocol, e.g. ISO-TP, just
-selects that protocol when opening the socket, and then can read and
-write application data byte streams, without having to deal with
-CAN-IDs, frames, etc.
-
-Similar functionality visible from user-space could be provided by a
-character device, too, but this would lead to a technically inelegant
-solution for a couple of reasons:
-
-* Intricate usage. Instead of passing a protocol argument to
- socket(2) and using bind(2) to select a CAN interface and CAN ID, an
- application would have to do all these operations using ioctl(2)s.
-
-* Code duplication. A character device cannot make use of the Linux
- network queueing code, so all that code would have to be duplicated
- for CAN networking.
-
-* Abstraction. In most existing character-device implementations, the
- hardware-specific device driver for a CAN controller directly
- provides the character device for the application to work with.
- This is at least very unusual in Unix systems for both, char and
- block devices. For example you don't have a character device for a
- certain UART of a serial interface, a certain sound chip in your
- computer, a SCSI or IDE controller providing access to your hard
- disk or tape streamer device. Instead, you have abstraction layers
- which provide a unified character or block device interface to the
- application on the one hand, and a interface for hardware-specific
- device drivers on the other hand. These abstractions are provided
- by subsystems like the tty layer, the audio subsystem or the SCSI
- and IDE subsystems for the devices mentioned above.
-
- The easiest way to implement a CAN device driver is as a character
- device without such a (complete) abstraction layer, as is done by most
- existing drivers. The right way, however, would be to add such a
- layer with all the functionality like registering for certain CAN
- IDs, supporting several open file descriptors and (de)multiplexing
- CAN frames between them, (sophisticated) queueing of CAN frames, and
- providing an API for device drivers to register with. However, then
- it would be no more difficult, or may be even easier, to use the
- networking framework provided by the Linux kernel, and this is what
- SocketCAN does.
-
- The use of the networking framework of the Linux kernel is just the
- natural and most appropriate way to implement CAN for Linux.
-
-3. SocketCAN concept
----------------------
-
- As described in chapter 2 it is the main goal of SocketCAN to
- provide a socket interface to user space applications which builds
- upon the Linux network layer. In contrast to the commonly known
- TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
- medium that has no MAC-layer addressing like ethernet. The CAN-identifier
- (can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
- have to be chosen uniquely on the bus. When designing a CAN-ECU
- network the CAN-IDs are mapped to be sent by a specific ECU.
- For this reason a CAN-ID can be treated best as a kind of source address.
-
- 3.1 receive lists
-
- The network transparent access of multiple applications leads to the
- problem that different applications may be interested in the same
- CAN-IDs from the same CAN network interface. The SocketCAN core
- module - which implements the protocol family CAN - provides several
- high efficient receive lists for this reason. If e.g. a user space
- application opens a CAN RAW socket, the raw protocol module itself
- requests the (range of) CAN-IDs from the SocketCAN core that are
- requested by the user. The subscription and unsubscription of
- CAN-IDs can be done for specific CAN interfaces or for all(!) known
- CAN interfaces with the can_rx_(un)register() functions provided to
- CAN protocol modules by the SocketCAN core (see chapter 5).
- To optimize the CPU usage at runtime the receive lists are split up
- into several specific lists per device that match the requested
- filter complexity for a given use-case.
-
- 3.2 local loopback of sent frames
-
- As known from other networking concepts the data exchanging
- applications may run on the same or different nodes without any
- change (except for the according addressing information):
-
- ___ ___ ___ _______ ___
- | _ | | _ | | _ | | _ _ | | _ |
- ||A|| ||B|| ||C|| ||A| |B|| ||C||
- |___| |___| |___| |_______| |___|
- | | | | |
- -----------------(1)- CAN bus -(2)---------------
-
- To ensure that application A receives the same information in the
- example (2) as it would receive in example (1) there is need for
- some kind of local loopback of the sent CAN frames on the appropriate
- node.
-
- The Linux network devices (by default) just can handle the
- transmission and reception of media dependent frames. Due to the
- arbitration on the CAN bus the transmission of a low prio CAN-ID
- may be delayed by the reception of a high prio CAN frame. To
- reflect the correct* traffic on the node the loopback of the sent
- data has to be performed right after a successful transmission. If
- the CAN network interface is not capable of performing the loopback for
- some reason the SocketCAN core can do this task as a fallback solution.
- See chapter 6.2 for details (recommended).
-
- The loopback functionality is enabled by default to reflect standard
- networking behaviour for CAN applications. Due to some requests from
- the RT-SocketCAN group the loopback optionally may be disabled for each
- separate socket. See sockopts from the CAN RAW sockets in chapter 4.1.
-
- * = you really like to have this when you're running analyser tools
- like 'candump' or 'cansniffer' on the (same) node.
-
- 3.3 network problem notifications
-
- The use of the CAN bus may lead to several problems on the physical
- and media access control layer. Detecting and logging of these lower
- layer problems is a vital requirement for CAN users to identify
- hardware issues on the physical transceiver layer as well as
- arbitration problems and error frames caused by the different
- ECUs. The occurrence of detected errors are important for diagnosis
- and have to be logged together with the exact timestamp. For this
- reason the CAN interface driver can generate so called Error Message
- Frames that can optionally be passed to the user application in the
- same way as other CAN frames. Whenever an error on the physical layer
- or the MAC layer is detected (e.g. by the CAN controller) the driver
- creates an appropriate error message frame. Error messages frames can
- be requested by the user application using the common CAN filter
- mechanisms. Inside this filter definition the (interested) type of
- errors may be selected. The reception of error messages is disabled
- by default. The format of the CAN error message frame is briefly
- described in the Linux header file "include/uapi/linux/can/error.h".
-
-4. How to use SocketCAN
-------------------------
-
- Like TCP/IP, you first need to open a socket for communicating over a
- CAN network. Since SocketCAN implements a new protocol family, you
- need to pass PF_CAN as the first argument to the socket(2) system
- call. Currently, there are two CAN protocols to choose from, the raw
- socket protocol and the broadcast manager (BCM). So to open a socket,
- you would write
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- and
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- respectively. After the successful creation of the socket, you would
- normally use the bind(2) system call to bind the socket to a CAN
- interface (which is different from TCP/IP due to different addressing
- - see chapter 3). After binding (CAN_RAW) or connecting (CAN_BCM)
- the socket, you can read(2) and write(2) from/to the socket or use
- send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
- on the socket as usual. There are also CAN specific socket options
- described below.
-
- The basic CAN frame structure and the sockaddr structure are defined
- in include/linux/can.h:
-
- struct can_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
- __u8 __pad; /* padding */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[8] __attribute__((aligned(8)));
- };
-
- The alignment of the (linear) payload data[] to a 64bit boundary
- allows the user to define their own structs and unions to easily access
- the CAN payload. There is no given byteorder on the CAN bus by
- default. A read(2) system call on a CAN_RAW socket transfers a
- struct can_frame to the user space.
-
- The sockaddr_can structure has an interface index like the
- PF_PACKET socket, that also binds to a specific interface:
-
- struct sockaddr_can {
- sa_family_t can_family;
- int can_ifindex;
- union {
- /* transport protocol class address info (e.g. ISOTP) */
- struct { canid_t rx_id, tx_id; } tp;
-
- /* reserved for future CAN protocols address information */
- } can_addr;
- };
-
- To determine the interface index an appropriate ioctl() has to
- be used (example for CAN_RAW sockets without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- strcpy(ifr.ifr_name, "can0" );
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- bind(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- To bind a socket to all(!) CAN interfaces the interface index must
- be 0 (zero). In this case the socket receives CAN frames from every
- enabled CAN interface. To determine the originating CAN interface
- the system call recvfrom(2) may be used instead of read(2). To send
- on a socket that is bound to 'any' interface sendto(2) is needed to
- specify the outgoing interface.
-
- Reading CAN frames from a bound CAN_RAW socket (see above) consists
- of reading a struct can_frame:
-
- struct can_frame frame;
-
- nbytes = read(s, &frame, sizeof(struct can_frame));
-
- if (nbytes < 0) {
- perror("can raw socket read");
- return 1;
- }
-
- /* paranoid check ... */
- if (nbytes < sizeof(struct can_frame)) {
- fprintf(stderr, "read: incomplete CAN frame\n");
- return 1;
- }
-
- /* do something with the received CAN frame */
-
- Writing CAN frames can be done similarly, with the write(2) system call:
-
- nbytes = write(s, &frame, sizeof(struct can_frame));
-
- When the CAN interface is bound to 'any' existing CAN interface
- (addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
- information about the originating CAN interface is needed:
-
- struct sockaddr_can addr;
- struct ifreq ifr;
- socklen_t len = sizeof(addr);
- struct can_frame frame;
-
- nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, &len);
-
- /* get interface name of the received CAN frame */
- ifr.ifr_ifindex = addr.can_ifindex;
- ioctl(s, SIOCGIFNAME, &ifr);
- printf("Received a CAN frame from interface %s", ifr.ifr_name);
-
- To write CAN frames on sockets bound to 'any' CAN interface the
- outgoing interface has to be defined certainly.
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
- addr.can_ifindex = ifr.ifr_ifindex;
- addr.can_family = AF_CAN;
-
- nbytes = sendto(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, sizeof(addr));
-
- An accurate timestamp can be obtained with an ioctl(2) call after reading
- a message from the socket:
-
- struct timeval tv;
- ioctl(s, SIOCGSTAMP, &tv);
-
- The timestamp has a resolution of one microsecond and is set automatically
- at the reception of a CAN frame.
-
- Remark about CAN FD (flexible data rate) support:
-
- Generally the handling of CAN FD is very similar to the formerly described
- examples. The new CAN FD capable CAN controllers support two different
- bitrates for the arbitration phase and the payload phase of the CAN FD frame
- and up to 64 bytes of payload. This extended payload length breaks all the
- kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
- bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
- the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
- switches the socket into a mode that allows the handling of CAN FD frames
- and (legacy) CAN frames simultaneously (see section 4.1.5).
-
- The struct canfd_frame is defined in include/linux/can.h:
-
- struct canfd_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 len; /* frame payload length in byte (0 .. 64) */
- __u8 flags; /* additional flags for CAN FD */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[64] __attribute__((aligned(8)));
- };
-
- The struct canfd_frame and the existing struct can_frame have the can_id,
- the payload length and the payload data at the same offset inside their
- structures. This allows to handle the different structures very similar.
- When the content of a struct can_frame is copied into a struct canfd_frame
- all structure elements can be used as-is - only the data[] becomes extended.
-
- When introducing the struct canfd_frame it turned out that the data length
- code (DLC) of the struct can_frame was used as a length information as the
- length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
- the easy handling of the length information the canfd_frame.len element
- contains a plain length value from 0 .. 64. So both canfd_frame.len and
- can_frame.can_dlc are equal and contain a length information and no DLC.
- For details about the distinction of CAN and CAN FD capable devices and
- the mapping to the bus-relevant data length code (DLC), see chapter 6.6.
-
- The length of the two CAN(FD) frame structures define the maximum transfer
- unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
- definitions are specified for CAN specific MTUs in include/linux/can.h :
-
- #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
- #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
-
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
-
- Using CAN_RAW sockets is extensively comparable to the commonly
- known access to CAN character devices. To meet the new possibilities
- provided by the multi user SocketCAN approach, some reasonable
- defaults are set at RAW socket binding time:
-
- - The filters are set to exactly one filter receiving everything
- - The socket only receives valid data frames (=> no error message frames)
- - The loopback of sent CAN frames is enabled (see chapter 3.2)
- - The socket does not receive its own sent frames (in loopback mode)
-
- These default settings may be changed before or after binding the socket.
- To use the referenced definitions of the socket options for CAN_RAW
- sockets, include <linux/can/raw.h>.
-
- 4.1.1 RAW socket option CAN_RAW_FILTER
-
- The reception of CAN frames using CAN_RAW sockets can be controlled
- by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
-
- The CAN filter structure is defined in include/linux/can.h:
-
- struct can_filter {
- canid_t can_id;
- canid_t can_mask;
- };
-
- A filter matches, when
-
- <received_can_id> & mask == can_id & mask
-
- which is analogous to known CAN controllers hardware filter semantics.
- The filter can be inverted in this semantic, when the CAN_INV_FILTER
- bit is set in can_id element of the can_filter structure. In
- contrast to CAN controller hardware filters the user may set 0 .. n
- receive filters for each open socket separately:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
- rfilter[1].can_id = 0x200;
- rfilter[1].can_mask = 0x700;
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- To disable the reception of CAN frames on the selected CAN_RAW socket:
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
-
- To set the filters to zero filters is quite obsolete as to not read
- data causes the raw socket to discard the received CAN frames. But
- having this 'send only' use-case we may remove the receive list in the
- Kernel to save a little (really a very little!) CPU usage.
-
- 4.1.1.1 CAN filter usage optimisation
-
- The CAN filters are processed in per-device filter lists at CAN frame
- reception time. To reduce the number of checks that need to be performed
- while walking through the filter lists the CAN core provides an optimized
- filter handling when the filter subscription focusses on a single CAN ID.
-
- For the possible 2048 SFF CAN identifiers the identifier is used as an index
- to access the corresponding subscription list without any further checks.
- For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
- hash function to retrieve the EFF table index.
-
- To benefit from the optimized filters for single CAN identifiers the
- CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
- with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
- can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
- subscribed. E.g. in the example from above
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
-
- both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
-
- To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
- filter has to be defined in this way to benefit from the optimized filters:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
- rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
- rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
-
- As described in chapter 3.3 the CAN interface driver can generate so
- called Error Message Frames that can optionally be passed to the user
- application in the same way as other CAN frames. The possible
- errors are divided into different error classes that may be filtered
- using the appropriate error mask. To register for every possible
- error condition CAN_ERR_MASK can be used as value for the error mask.
- The values for the error mask are defined in linux/can/error.h .
-
- can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
- &err_mask, sizeof(err_mask));
-
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
-
- To meet multi user needs the local loopback is enabled by default
- (see chapter 3.2 for details). But in some embedded use-cases
- (e.g. when only one application uses the CAN bus) this loopback
- functionality can be disabled (separately for each socket):
-
- int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
-
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
-
- When the local loopback is enabled, all the sent CAN frames are
- looped back to the open CAN sockets that registered for the CAN
- frames' CAN-ID on this given interface to meet the multi user
- needs. The reception of the CAN frames on the same socket that was
- sending the CAN frame is assumed to be unwanted and therefore
- disabled by default. This default behaviour may be changed on
- demand:
-
- int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
- &recv_own_msgs, sizeof(recv_own_msgs));
-
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-
- CAN FD support in CAN_RAW sockets can be enabled with a new socket option
- CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
- not supported by the CAN_RAW socket (e.g. on older kernels), switching the
- CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
-
- Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
- and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
- when reading from the socket.
-
- CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
- CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
-
- Example:
- [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
-
- struct canfd_frame cfd;
-
- nbytes = read(s, &cfd, CANFD_MTU);
-
- if (nbytes == CANFD_MTU) {
- printf("got CAN FD frame with length %d\n", cfd.len);
- /* cfd.flags contains valid data */
- } else if (nbytes == CAN_MTU) {
- printf("got legacy CAN frame with length %d\n", cfd.len);
- /* cfd.flags is undefined */
- } else {
- fprintf(stderr, "read: invalid CAN(FD) frame\n");
- return 1;
- }
-
- /* the content can be handled independently from the received MTU size */
-
- printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
- for (i = 0; i < cfd.len; i++)
- printf("%02X ", cfd.data[i]);
-
- When reading with size CANFD_MTU only returns CAN_MTU bytes that have
- been received from the socket a legacy CAN frame has been read into the
- provided CAN FD structure. Note that the canfd_frame.flags data field is
- not specified in the struct can_frame and therefore it is only valid in
- CANFD_MTU sized CAN FD frames.
-
- Implementation hint for new CAN applications:
-
- To build a CAN FD aware application use struct canfd_frame as basic CAN
- data structure for CAN_RAW based applications. When the application is
- executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
- socket option returns an error: No problem. You'll get legacy CAN frames
- or CAN FD frames and can process them the same way.
-
- When sending to CAN devices make sure that the device is capable to handle
- CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
-
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
-
- The CAN_RAW socket can set multiple CAN identifier specific filters that
- lead to multiple filters in the af_can.c filter processing. These filters
- are indenpendent from each other which leads to logical OR'ed filters when
- applied (see 4.1.1).
-
- This socket option joines the given CAN filters in the way that only CAN
- frames are passed to user space that matched *all* given CAN filters. The
- semantic for the applied filters is therefore changed to a logical AND.
-
- This is useful especially when the filterset is a combination of filters
- where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
- CAN ID ranges from the incoming traffic.
-
- 4.1.7 RAW socket returned message flags
-
- When using recvmsg() call, the msg->msg_flags may contain following flags:
-
- MSG_DONTROUTE: set when the received frame was created on the local host.
-
- MSG_CONFIRM: set when the frame was sent via the socket it is received on.
- This flag can be interpreted as a 'transmission confirmation' when the
- CAN driver supports the echo of frames on driver level, see 3.2 and 6.2.
- In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
-
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
-
- The Broadcast Manager protocol provides a command based configuration
- interface to filter and send (e.g. cyclic) CAN messages in kernel space.
-
- Receive filters can be used to down sample frequent messages; detect events
- such as message contents changes, packet length changes, and do time-out
- monitoring of received messages.
-
- Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
- created and modified at runtime; both the message content and the two
- possible transmit intervals can be altered.
-
- A BCM socket is not intended for sending individual CAN frames using the
- struct can_frame as known from the CAN_RAW socket. Instead a special BCM
- configuration message is defined. The basic BCM configuration message used
- to communicate with the broadcast manager and the available operations are
- defined in the linux/can/bcm.h include. The BCM message consists of a
- message header with a command ('opcode') followed by zero or more CAN frames.
- The broadcast manager sends responses to user space in the same form:
-
- struct bcm_msg_head {
- __u32 opcode; /* command */
- __u32 flags; /* special flags */
- __u32 count; /* run 'count' times with ival1 */
- struct timeval ival1, ival2; /* count and subsequent interval */
- canid_t can_id; /* unique can_id for task */
- __u32 nframes; /* number of can_frames following */
- struct can_frame frames[0];
- };
-
- The aligned payload 'frames' uses the same basic CAN frame structure defined
- at the beginning of section 4 and in the include/linux/can.h include. All
- messages to the broadcast manager from user space have this structure.
-
- Note a CAN_BCM socket must be connected instead of bound after socket
- creation (example without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- connect(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- The broadcast manager socket is able to handle any number of in flight
- transmissions or receive filters concurrently. The different RX/TX jobs are
- distinguished by the unique can_id in each BCM message. However additional
- CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
- When the broadcast manager socket is bound to 'any' CAN interface (=> the
- interface index is set to zero) the configured receive filters apply to any
- CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
- interface index. When using recvfrom() instead of read() to retrieve BCM
- socket messages the originating CAN interface is provided in can_ifindex.
-
- 4.2.1 Broadcast Manager operations
-
- The opcode defines the operation for the broadcast manager to carry out,
- or details the broadcast managers response to several events, including
- user requests.
-
- Transmit Operations (user space to broadcast manager):
-
- TX_SETUP: Create (cyclic) transmission task.
-
- TX_DELETE: Remove (cyclic) transmission task, requires only can_id.
-
- TX_READ: Read properties of (cyclic) transmission task for can_id.
-
- TX_SEND: Send one CAN frame.
-
- Transmit Responses (broadcast manager to user space):
-
- TX_STATUS: Reply to TX_READ request (transmission task configuration).
-
- TX_EXPIRED: Notification when counter finishes sending at initial interval
- 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
-
- Receive Operations (user space to broadcast manager):
-
- RX_SETUP: Create RX content filter subscription.
-
- RX_DELETE: Remove RX content filter subscription, requires only can_id.
-
- RX_READ: Read properties of RX content filter subscription for can_id.
-
- Receive Responses (broadcast manager to user space):
-
- RX_STATUS: Reply to RX_READ request (filter task configuration).
-
- RX_TIMEOUT: Cyclic message is detected to be absent (timer ival1 expired).
-
- RX_CHANGED: BCM message with updated CAN frame (detected content change).
- Sent on first message received or on receipt of revised CAN messages.
-
- 4.2.2 Broadcast Manager message flags
-
- When sending a message to the broadcast manager the 'flags' element may
- contain the following flag definitions which influence the behaviour:
-
- SETTIMER: Set the values of ival1, ival2 and count
-
- STARTTIMER: Start the timer with the actual values of ival1, ival2
- and count. Starting the timer leads simultaneously to emit a CAN frame.
-
- TX_COUNTEVT: Create the message TX_EXPIRED when count expires
-
- TX_ANNOUNCE: A change of data by the process is emitted immediately.
-
- TX_CP_CAN_ID: Copies the can_id from the message header to each
- subsequent frame in frames. This is intended as usage simplification. For
- TX tasks the unique can_id from the message header may differ from the
- can_id(s) stored for transmission in the subsequent struct can_frame(s).
-
- RX_FILTER_ID: Filter by can_id alone, no frames required (nframes=0).
-
- RX_CHECK_DLC: A change of the DLC leads to an RX_CHANGED.
-
- RX_NO_AUTOTIMER: Prevent automatically starting the timeout monitor.
-
- RX_ANNOUNCE_RESUME: If passed at RX_SETUP and a receive timeout occurred, a
- RX_CHANGED message will be generated when the (cyclic) receive restarts.
-
- TX_RESET_MULTI_IDX: Reset the index for the multiple frame transmission.
-
- RX_RTR_FRAME: Send reply for RTR-request (placed in op->frames[0]).
-
- 4.2.3 Broadcast Manager transmission timers
-
- Periodic transmission configurations may use up to two interval timers.
- In this case the BCM sends a number of messages ('count') at an interval
- 'ival1', then continuing to send at another given interval 'ival2'. When
- only one timer is needed 'count' is set to zero and only 'ival2' is used.
- When SET_TIMER and START_TIMER flag were set the timers are activated.
- The timer values can be altered at runtime when only SET_TIMER is set.
-
- 4.2.4 Broadcast Manager message sequence transmission
-
- Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
- TX task configuration. The number of CAN frames is provided in the 'nframes'
- element of the BCM message head. The defined number of CAN frames are added
- as array to the TX_SETUP BCM configuration message.
-
- /* create a struct to set up a sequence of four CAN frames */
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[4];
- } mytxmsg;
-
- (..)
- mytxmsg.msg_head.nframes = 4;
- (..)
-
- write(s, &mytxmsg, sizeof(mytxmsg));
-
- With every transmission the index in the array of CAN frames is increased
- and set to zero at index overflow.
-
- 4.2.5 Broadcast Manager receive filter timers
-
- The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
- When the SET_TIMER flag is set the timers are enabled:
-
- ival1: Send RX_TIMEOUT when a received message is not received again within
- the given time. When START_TIMER is set at RX_SETUP the timeout detection
- is activated directly - even without a former CAN frame reception.
-
- ival2: Throttle the received message rate down to the value of ival2. This
- is useful to reduce messages for the application when the signal inside the
- CAN frame is stateless as state changes within the ival2 periode may get
- lost.
-
- 4.2.6 Broadcast Manager multiplex message receive filter
-
- To filter for content changes in multiplex message sequences an array of more
- than one CAN frames can be passed in a RX_SETUP configuration message. The
- data bytes of the first CAN frame contain the mask of relevant bits that
- have to match in the subsequent CAN frames with the received CAN frame.
- If one of the subsequent CAN frames is matching the bits in that frame data
- mark the relevant content to be compared with the previous received content.
- Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
- filters) can be added as array to the TX_SETUP BCM configuration message.
-
- /* usually used to clear CAN frame data[] - beware of endian problems! */
- #define U64_DATA(p) (*(unsigned long long*)(p)->data)
-
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = 0;
- msg.msg_head.nframes = 5;
- U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
- U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
- U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
- U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
- U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
-
- write(s, &msg, sizeof(msg));
-
- 4.2.7 Broadcast Manager CAN FD support
-
- The programming API of the CAN_BCM depends on struct can_frame which is
- given as array directly behind the bcm_msg_head structure. To follow this
- schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
- flags indicates that the concatenated CAN frame structures behind the
- bcm_msg_head are defined as struct canfd_frame.
-
- struct {
- struct bcm_msg_head msg_head;
- struct canfd_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = CAN_FD_FRAME;
- msg.msg_head.nframes = 5;
- (..)
-
- When using CAN FD frames for multiplex filtering the MUX mask is still
- expected in the first 64 bit of the struct canfd_frame data section.
-
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
-
-5. SocketCAN core module
--------------------------
-
- The SocketCAN core module implements the protocol family
- PF_CAN. CAN protocol modules are loaded by the core module at
- runtime. The core module provides an interface for CAN protocol
- modules to subscribe needed CAN IDs (see chapter 3.1).
-
- 5.1 can.ko module params
-
- - stats_timer: To calculate the SocketCAN core statistics
- (e.g. current/maximum frames per second) this 1 second timer is
- invoked at can.ko module start time by default. This timer can be
- disabled by using stattimer=0 on the module commandline.
-
- - debug: (removed since SocketCAN SVN r546)
-
- 5.2 procfs content
-
- As described in chapter 3.1 the SocketCAN core uses several filter
- lists to deliver received CAN frames to CAN protocol modules. These
- receive lists, their filters and the count of filter matches can be
- checked in the appropriate receive list. All entries contain the
- device and a protocol module identifier:
-
- foo@bar:~$ cat /proc/net/can/rcvlist_all
-
- receive list 'rx_all':
- (vcan3: no entry)
- (vcan2: no entry)
- (vcan1: no entry)
- device can_id can_mask function userdata matches ident
- vcan0 000 00000000 f88e6370 f6c6f400 0 raw
- (any: no entry)
-
- In this example an application requests any CAN traffic from vcan0.
-
- rcvlist_all - list for unfiltered entries (no filter operations)
- rcvlist_eff - list for single extended frame (EFF) entries
- rcvlist_err - list for error message frames masks
- rcvlist_fil - list for mask/value filters
- rcvlist_inv - list for mask/value filters (inverse semantic)
- rcvlist_sff - list for single standard frame (SFF) entries
-
- Additional procfs files in /proc/net/can
-
- stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
- reset_stats - manual statistic reset
- version - prints the SocketCAN core version and the ABI version
-
- 5.3 writing own CAN protocol modules
-
- To implement a new protocol in the protocol family PF_CAN a new
- protocol has to be defined in include/linux/can.h .
- The prototypes and definitions to use the SocketCAN core can be
- accessed by including include/linux/can/core.h .
- In addition to functions that register the CAN protocol and the
- CAN device notifier chain there are functions to subscribe CAN
- frames received by CAN interfaces and to send CAN frames:
-
- can_rx_register - subscribe CAN frames from a specific interface
- can_rx_unregister - unsubscribe CAN frames from a specific interface
- can_send - transmit a CAN frame (optional with local loopback)
-
- For details see the kerneldoc documentation in net/can/af_can.c or
- the source code of net/can/raw.c or net/can/bcm.c .
-
-6. CAN network drivers
-----------------------
-
- Writing a CAN network device driver is much easier than writing a
- CAN character device driver. Similar to other known network device
- drivers you mainly have to deal with:
-
- - TX: Put the CAN frame from the socket buffer to the CAN controller.
- - RX: Put the CAN frame from the CAN controller to the socket buffer.
-
- See e.g. at Documentation/networking/netdevices.txt . The differences
- for writing CAN network device driver are described below:
-
- 6.1 general settings
-
- dev->type = ARPHRD_CAN; /* the netdevice hardware type */
- dev->flags = IFF_NOARP; /* CAN has no arp */
-
- dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
-
- or alternative, when the controller supports CAN with flexible data rate:
- dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
-
- The struct can_frame or struct canfd_frame is the payload of each socket
- buffer (skbuff) in the protocol family PF_CAN.
-
- 6.2 local loopback of sent frames
-
- As described in chapter 3.2 the CAN network device driver should
- support a local loopback functionality similar to the local echo
- e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
- set to prevent the PF_CAN core from locally echoing sent frames
- (aka loopback) as fallback solution:
-
- dev->flags = (IFF_NOARP | IFF_ECHO);
-
- 6.3 CAN controller hardware filters
-
- To reduce the interrupt load on deep embedded systems some CAN
- controllers support the filtering of CAN IDs or ranges of CAN IDs.
- These hardware filter capabilities vary from controller to
- controller and have to be identified as not feasible in a multi-user
- networking approach. The use of the very controller specific
- hardware filters could make sense in a very dedicated use-case, as a
- filter on driver level would affect all users in the multi-user
- system. The high efficient filter sets inside the PF_CAN core allow
- to set different multiple filters for each socket separately.
- Therefore the use of hardware filters goes to the category 'handmade
- tuning on deep embedded systems'. The author is running a MPC603e
- @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
- load without any problems ...
-
- 6.4 The virtual CAN driver (vcan)
-
- Similar to the network loopback devices, vcan offers a virtual local
- CAN interface. A full qualified address on CAN consists of
-
- - a unique CAN Identifier (CAN ID)
- - the CAN bus this CAN ID is transmitted on (e.g. can0)
-
- so in common use cases more than one virtual CAN interface is needed.
-
- The virtual CAN interfaces allow the transmission and reception of CAN
- frames without real CAN controller hardware. Virtual CAN network
- devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
- When compiled as a module the virtual CAN driver module is called vcan.ko
-
- Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
- netlink interface to create vcan network devices. The creation and
- removal of vcan network devices can be managed with the ip(8) tool:
-
- - Create a virtual CAN network interface:
- $ ip link add type vcan
-
- - Create a virtual CAN network interface with a specific name 'vcan42':
- $ ip link add dev vcan42 type vcan
-
- - Remove a (virtual CAN) network interface 'vcan42':
- $ ip link del vcan42
-
- 6.5 The CAN network device driver interface
-
- The CAN network device driver interface provides a generic interface
- to setup, configure and monitor CAN network devices. The user can then
- configure the CAN device, like setting the bit-timing parameters, via
- the netlink interface using the program "ip" from the "IPROUTE2"
- utility suite. The following chapter describes briefly how to use it.
- Furthermore, the interface uses a common data structure and exports a
- set of common functions, which all real CAN network device drivers
- should use. Please have a look to the SJA1000 or MSCAN driver to
- understand how to use them. The name of the module is can-dev.ko.
-
- 6.5.1 Netlink interface to set/get devices properties
-
- The CAN device must be configured via netlink interface. The supported
- netlink message types are defined and briefly described in
- "include/linux/can/netlink.h". CAN link support for the program "ip"
- of the IPROUTE2 utility suite is available and it can be used as shown
- below:
-
- - Setting CAN device properties:
-
- $ ip link set can0 type can help
- Usage: ip link set DEVICE type can
- [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
- [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
- phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
-
- [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
- [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
- dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
-
- [ loopback { on | off } ]
- [ listen-only { on | off } ]
- [ triple-sampling { on | off } ]
- [ one-shot { on | off } ]
- [ berr-reporting { on | off } ]
- [ fd { on | off } ]
- [ fd-non-iso { on | off } ]
- [ presume-ack { on | off } ]
-
- [ restart-ms TIME-MS ]
- [ restart ]
-
- Where: BITRATE := { 1..1000000 }
- SAMPLE-POINT := { 0.000..0.999 }
- TQ := { NUMBER }
- PROP-SEG := { 1..8 }
- PHASE-SEG1 := { 1..8 }
- PHASE-SEG2 := { 1..8 }
- SJW := { 1..4 }
- RESTART-MS := { 0 | NUMBER }
-
- - Display CAN device details and statistics:
-
- $ ip -details -statistics link show can0
- 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
- link/can
- can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
- bitrate 125000 sample_point 0.875
- tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
- sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000
- re-started bus-errors arbit-lost error-warn error-pass bus-off
- 41 17457 0 41 42 41
- RX: bytes packets errors dropped overrun mcast
- 140859 17608 17457 0 0 0
- TX: bytes packets errors dropped carrier collsns
- 861 112 0 41 0 0
-
- More info to the above output:
-
- "<TRIPLE-SAMPLING>"
- Shows the list of selected CAN controller modes: LOOPBACK,
- LISTEN-ONLY, or TRIPLE-SAMPLING.
-
- "state ERROR-ACTIVE"
- The current state of the CAN controller: "ERROR-ACTIVE",
- "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
-
- "restart-ms 100"
- Automatic restart delay time. If set to a non-zero value, a
- restart of the CAN controller will be triggered automatically
- in case of a bus-off condition after the specified delay time
- in milliseconds. By default it's off.
-
- "bitrate 125000 sample-point 0.875"
- Shows the real bit-rate in bits/sec and the sample-point in the
- range 0.000..0.999. If the calculation of bit-timing parameters
- is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
- bit-timing can be defined by setting the "bitrate" argument.
- Optionally the "sample-point" can be specified. By default it's
- 0.000 assuming CIA-recommended sample-points.
-
- "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
- Shows the time quanta in ns, propagation segment, phase buffer
- segment 1 and 2 and the synchronisation jump width in units of
- tq. They allow to define the CAN bit-timing in a hardware
- independent format as proposed by the Bosch CAN 2.0 spec (see
- chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
-
- "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000"
- Shows the bit-timing constants of the CAN controller, here the
- "sja1000". The minimum and maximum values of the time segment 1
- and 2, the synchronisation jump width in units of tq, the
- bitrate pre-scaler and the CAN system clock frequency in Hz.
- These constants could be used for user-defined (non-standard)
- bit-timing calculation algorithms in user-space.
-
- "re-started bus-errors arbit-lost error-warn error-pass bus-off"
- Shows the number of restarts, bus and arbitration lost errors,
- and the state changes to the error-warning, error-passive and
- bus-off state. RX overrun errors are listed in the "overrun"
- field of the standard network statistics.
-
- 6.5.2 Setting the CAN bit-timing
-
- The CAN bit-timing parameters can always be defined in a hardware
- independent format as proposed in the Bosch CAN 2.0 specification
- specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
- and "sjw":
-
- $ ip link set canX type can tq 125 prop-seg 6 \
- phase-seg1 7 phase-seg2 2 sjw 1
-
- If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
- recommended CAN bit-timing parameters will be calculated if the bit-
- rate is specified with the argument "bitrate":
-
- $ ip link set canX type can bitrate 125000
-
- Note that this works fine for the most common CAN controllers with
- standard bit-rates but may *fail* for exotic bit-rates or CAN system
- clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
- space and allows user-space tools to solely determine and set the
- bit-timing parameters. The CAN controller specific bit-timing
- constants can be used for that purpose. They are listed by the
- following command:
-
- $ ip -details link show can0
- ...
- sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
-
- 6.5.3 Starting and stopping the CAN network device
-
- A CAN network device is started or stopped as usual with the command
- "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
- you *must* define proper bit-timing parameters for real CAN devices
- before you can start it to avoid error-prone default settings:
-
- $ ip link set canX up type can bitrate 125000
-
- A device may enter the "bus-off" state if too many errors occurred on
- the CAN bus. Then no more messages are received or sent. An automatic
- bus-off recovery can be enabled by setting the "restart-ms" to a
- non-zero value, e.g.:
-
- $ ip link set canX type can restart-ms 100
-
- Alternatively, the application may realize the "bus-off" condition
- by monitoring CAN error message frames and do a restart when
- appropriate with the command:
-
- $ ip link set canX type can restart
-
- Note that a restart will also create a CAN error message frame (see
- also chapter 3.3).
-
- 6.6 CAN FD (flexible data rate) driver support
-
- CAN FD capable CAN controllers support two different bitrates for the
- arbitration phase and the payload phase of the CAN FD frame. Therefore a
- second bit timing has to be specified in order to enable the CAN FD bitrate.
-
- Additionally CAN FD capable CAN controllers support up to 64 bytes of
- payload. The representation of this length in can_frame.can_dlc and
- canfd_frame.len for userspace applications and inside the Linux network
- layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
- The data length code was a 1:1 mapping to the payload length in the legacy
- CAN frames anyway. The payload length to the bus-relevant DLC mapping is
- only performed inside the CAN drivers, preferably with the helper
- functions can_dlc2len() and can_len2dlc().
-
- The CAN netdevice driver capabilities can be distinguished by the network
- devices maximum transfer unit (MTU):
-
- MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
- MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
-
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
- N.B. CAN FD capable devices can also handle and send legacy CAN frames.
-
- When configuring CAN FD capable CAN controllers an additional 'data' bitrate
- has to be set. This bitrate for the data phase of the CAN FD frame has to be
- at least the bitrate which was configured for the arbitration phase. This
- second bitrate is specified analogue to the first bitrate but the bitrate
- setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
- dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
- within the configuration process the controller option "fd on" can be
- specified to enable the CAN FD mode in the CAN controller. This controller
- option also switches the device MTU to 72 (CANFD_MTU).
-
- The first CAN FD specification presented as whitepaper at the International
- CAN Conference 2012 needed to be improved for data integrity reasons.
- Therefore two CAN FD implementations have to be distinguished today:
-
- - ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
- - non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
-
- Finally there are three types of CAN FD controllers:
-
- 1. ISO compliant (fixed)
- 2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
- 3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
-
- The current ISO/non-ISO mode is announced by the CAN controller driver via
- netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
- The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
- switchable CAN FD controllers only.
-
- Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate:
-
- $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
- dbitrate 4000000 dsample-point 0.8 fd on
- $ ip -details link show can0
- 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
- mode DEFAULT group default qlen 10
- link/can promiscuity 0
- can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
- bitrate 500000 sample-point 0.750
- tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
- pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
- brp-inc 1
- dbitrate 4000000 dsample-point 0.800
- dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
- pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
- dbrp-inc 1
- clock 80000000
-
- Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:
- can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
-
- 6.7 Supported CAN hardware
-
- Please check the "Kconfig" file in "drivers/net/can" to get an actual
- list of the support CAN hardware. On the SocketCAN project website
- (see chapter 7) there might be further drivers available, also for
- older kernel versions.
-
-7. SocketCAN resources
------------------------
-
- The Linux CAN / SocketCAN project resources (project site / mailing list)
- are referenced in the MAINTAINERS file in the Linux source tree.
- Search for CAN NETWORK [LAYERS|DRIVERS].
-
-8. Credits
-----------
-
- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
- CAN device driver interface, MSCAN driver)
- Robert Schwebel (design reviews, PTXdist integration)
- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
- Benedikt Spranger (reviews)
- Thomas Gleixner (LKML reviews, coding style, posting hints)
- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
- Klaus Hitschler (PEAK driver integration)
- Uwe Koppe (CAN netdevices with PF_PACKET approach)
- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
- Pavel Pisa (Bit-timing calculation)
- Sascha Hauer (SJA1000 platform driver)
- Sebastian Haas (SJA1000 EMS PCI driver)
- Markus Plessing (SJA1000 EMS PCI driver)
- Per Dalen (SJA1000 Kvaser PCI driver)
- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 7d4b159..90966c2 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -7,6 +7,7 @@ Contents:
:maxdepth: 2
batman-adv
+ can
kapi
z8530book
msg_zerocopy
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 2c4e335..d2fd78f 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -12,8 +12,8 @@ suitable sample script and configure that.
On a dual CPU:
ps aux | grep pkt
-root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0]
-root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1]
+root 129 0.3 0.0 0 0 ? SW 2003 523:20 [kpktgend_0]
+root 130 0.3 0.0 0 0 ? SW 2003 509:50 [kpktgend_1]
For monitoring and control pktgen creates:
@@ -113,9 +113,16 @@ Configuring devices
===================
This is done via the /proc interface, and most easily done via pgset
as defined in the sample scripts.
+You need to specify PGDEV environment variable to use functions from sample
+scripts, i.e.:
+export PGDEV=/proc/net/pktgen/eth4@0
+source samples/pktgen/functions.sh
Examples:
+ pg_ctrl start starts injection.
+ pg_ctrl stop aborts injection. Also, ^C aborts generator.
+
pgset "clone_skb 1" sets the number of copies of the same packet
pgset "clone_skb 0" use single SKB for all transmits
pgset "burst 8" uses xmit_more API to queue 8 copies of the same
@@ -165,8 +172,12 @@ Examples:
IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
NODE_ALLOC # node specific memory allocation
NO_TIMESTAMP # disable timestamping
+ pgset 'flag ![name]' Clear a flag to determine behaviour.
+ Note that you might need to use single quote in
+ interactive mode, so that your shell wouldn't expand
+ the specified flag as a history command.
- pgset spi SPI_VALUE Set specific SA used to transform packet.
+ pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
cycle through the port range.
@@ -207,8 +218,6 @@ Examples:
pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
- pgset stop aborts injection. Also, ^C aborts generator.
-
pgset "rate 300M" set rate to 300 Mb/s
pgset "ratep 1000000" set rate to 1Mpps
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
index 2d9d588c..50c34ca 100644
--- a/Documentation/networking/xfrm_device.txt
+++ b/Documentation/networking/xfrm_device.txt
@@ -41,6 +41,7 @@ struct xfrmdev_ops {
void (*xdo_dev_state_free) (struct xfrm_state *x);
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
};
The NIC driver offering ipsec offload will need to implement these
@@ -117,6 +118,8 @@ the stack in xfrm_input().
hand the packet to napi_gro_receive() as usual
+In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+Driver will check packet seq number and update HW ESN state machine if needed.
When the SA is removed by the user, the driver's xdo_dev_state_delete()
is asked to disable the offload. Later, xdo_dev_state_free() is called
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 57d3ee9..fc3ae95 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
or if no page table is present for the addresses (e.g. when using
hugepages).
+4.108 KVM_PPC_GET_CPU_CHAR
+
+Capability: KVM_CAP_PPC_GET_CPU_CHAR
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_cpu_char (out)
+Returns: 0 on successful completion
+ -EFAULT if struct kvm_ppc_cpu_char cannot be written
+
+This ioctl gives userspace information about certain characteristics
+of the CPU relating to speculative execution of instructions and
+possible information leakage resulting from speculative execution (see
+CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is
+returned in struct kvm_ppc_cpu_char, which looks like this:
+
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+For extensibility, the character_mask and behaviour_mask fields
+indicate which bits of character and behaviour have been filled in by
+the kernel. If the set of defined bits is extended in future then
+userspace will be able to tell whether it is running on a kernel that
+knows about the new bits.
+
+The character field describes attributes of the CPU which can help
+with preventing inadvertent information disclosure - specifically,
+whether there is an instruction to flash-invalidate the L1 data cache
+(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+to a mode where entries can only be used by the thread that created
+them, whether the bcctr[l] instruction prevents speculation, and
+whether a speculation barrier instruction (ori 31,31,0) is provided.
+
+The behaviour field describes actions that software should take to
+prevent inadvertent information disclosure, and thus describes which
+vulnerabilities the hardware is subject to; specifically whether the
+L1 data cache should be flushed when returning to user mode from the
+kernel, and whether a speculation barrier should be placed between an
+array bounds check and the array access.
+
+These fields use the same bit definitions as the new
+H_GET_CPU_CHARACTERISTICS hypercall.
+
5. The kvm_run structure
------------------------
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
index d11eff6..5cd5843 100644
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.txt
@@ -78,7 +78,7 @@ this protection comes at a cost:
non-PTI SYSCALL entry code, so requires mapping fewer
things into the userspace page tables. The downside is
that stacks must be switched at entry time.
- d. Global pages are disabled for all kernel structures not
+ c. Global pages are disabled for all kernel structures not
mapped into both kernel and userspace page tables. This
feature of the MMU allows different processes to share TLB
entries mapping the kernel. Losing the feature means more
diff --git a/MAINTAINERS b/MAINTAINERS
index 079af8b..884ee96 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -62,7 +62,15 @@ trivial patch so apply some common sense.
7. When sending security related changes or reports to a maintainer
please Cc: security@kernel.org, especially if the maintainer
- does not respond.
+ does not respond. Please keep in mind that the security team is
+ a small set of people who can be efficient only when working on
+ verified bugs. Please only Cc: this list when you have identified
+ that the bug would present a short-term risk to other users if it
+ were publicly disclosed. For example, reports of address leaks do
+ not represent an immediate threat and are better handled publicly,
+ and ideally, should come with a patch proposal. Please do not send
+ automated reports to this list either. Such bugs will be handled
+ better and faster in the usual public places.
8. Happy hacking.
@@ -3198,7 +3206,7 @@ W: https://github.com/linux-can
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
S: Maintained
-F: Documentation/networking/can.txt
+F: Documentation/networking/can.rst
F: net/can/
F: include/linux/can/core.h
F: include/uapi/linux/can.h
@@ -9102,6 +9110,7 @@ F: drivers/usb/image/microtek.*
MIPS
M: Ralf Baechle <ralf@linux-mips.org>
+M: James Hogan <jhogan@kernel.org>
L: linux-mips@linux-mips.org
W: http://www.linux-mips.org/
T: git git://git.linux-mips.org/pub/scm/ralf/linux.git
@@ -12252,7 +12261,7 @@ M: Security Officers <security@kernel.org>
S: Supported
SECURITY SUBSYSTEM
-M: James Morris <james.l.morris@oracle.com>
+M: James Morris <jmorris@namei.org>
M: "Serge E. Hallyn" <serge@hallyn.com>
L: linux-security-module@vger.kernel.org (suggested Cc:)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git
diff --git a/Makefile b/Makefile
index bf5b8cb..339397b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
NAME = Fearless Coyote
# *DOCUMENTATION*
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
index 37bd6d9..a6bdc1d 100644
--- a/arch/alpha/kernel/sys_sio.c
+++ b/arch/alpha/kernel/sys_sio.c
@@ -102,6 +102,15 @@ sio_pci_route(void)
alpha_mv.sys.sio.route_tab);
}
+static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev)
+{
+ if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
+ (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
+ return false;
+
+ return true;
+}
+
static unsigned int __init
sio_collect_irq_levels(void)
{
@@ -110,8 +119,7 @@ sio_collect_irq_levels(void)
/* Iterate through the devices, collecting IRQ levels. */
for_each_pci_dev(dev) {
- if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
- (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
+ if (!sio_pci_dev_irq_needs_level(dev))
continue;
if (dev->irq)
@@ -120,8 +128,7 @@ sio_collect_irq_levels(void)
return level_bits;
}
-static void __init
-sio_fixup_irq_levels(unsigned int level_bits)
+static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset)
{
unsigned int old_level_bits;
@@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits)
*/
old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8);
- level_bits |= (old_level_bits & 0x71ff);
+ if (reset)
+ old_level_bits &= 0x71ff;
+
+ level_bits |= old_level_bits;
outb((level_bits >> 0) & 0xff, 0x4d0);
outb((level_bits >> 8) & 0xff, 0x4d1);
}
+static inline void
+sio_fixup_irq_levels(unsigned int level_bits)
+{
+ __sio_fixup_irq_levels(level_bits, true);
+}
+
static inline int
noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
@@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5;
int irq = COMMON_TABLE_LOOKUP, tmp;
tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
- return irq >= 0 ? tmp : -1;
+
+ irq = irq >= 0 ? tmp : -1;
+
+ /* Fixup IRQ level if an actual IRQ mapping is detected */
+ if (sio_pci_dev_irq_needs_level(dev) && irq >= 0)
+ __sio_fixup_irq_levels(1 << irq, false);
+
+ return irq;
}
static inline int
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
index 316a99a..1cfcfbb 100644
--- a/arch/alpha/lib/ev6-memset.S
+++ b/arch/alpha/lib/ev6-memset.S
@@ -18,7 +18,7 @@
* The algorithm for the leading and trailing quadwords remains the same,
* however the loop has been unrolled to enable better memory throughput,
* and the code has been replicated for each of the entry points: __memset
- * and __memsetw to permit better scheduling to eliminate the stalling
+ * and __memset16 to permit better scheduling to eliminate the stalling
* encountered during the mask replication.
* A future enhancement might be to put in a byte store loop for really
* small (say < 32 bytes) memset()s. Whether or not that change would be
@@ -34,7 +34,7 @@
.globl memset
.globl __memset
.globl ___memset
- .globl __memsetw
+ .globl __memset16
.globl __constant_c_memset
.ent ___memset
@@ -415,9 +415,9 @@ end:
* to mask stalls. Note that entry point names also had to change
*/
.align 5
- .ent __memsetw
+ .ent __memset16
-__memsetw:
+__memset16:
.frame $30,0,$26,0
.prologue 0
@@ -596,8 +596,8 @@ end_w:
nop
ret $31,($26),1 # L0 :
- .end __memsetw
- EXPORT_SYMBOL(__memsetw)
+ .end __memset16
+ EXPORT_SYMBOL(__memset16)
memset = ___memset
__memset = ___memset
diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts
index 404a93d..3ec5850 100644
--- a/arch/arm/boot/dts/imx6q-b450v3.dts
+++ b/arch/arm/boot/dts/imx6q-b450v3.dts
@@ -112,3 +112,55 @@
line-name = "PCA9539-P07";
};
};
+
+&pci_root {
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00010000 0 0 0 0>;
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "enacq";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport1";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "enembc";
+
+ /* connected to Ethernet MAC of AT91RM9200 in MII mode */
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts
index 7f9f176..5650a9b 100644
--- a/arch/arm/boot/dts/imx6q-b650v3.dts
+++ b/arch/arm/boot/dts/imx6q-b650v3.dts
@@ -111,3 +111,55 @@
fsl,tx-cal-45-dp-ohms = <55>;
fsl,tx-d-cal = <100>;
};
+
+&pci_root {
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00010000 0 0 0 0>;
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "enacq";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport1";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "enembc";
+
+ /* connected to Ethernet MAC of AT91RM9200 in MII mode */
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts
index 46bdc67..35edbdc 100644
--- a/arch/arm/boot/dts/imx6q-b850v3.dts
+++ b/arch/arm/boot/dts/imx6q-b850v3.dts
@@ -212,3 +212,78 @@
};
};
};
+
+&pci_root {
+ /* PLX Technology, Inc. PEX 8605 PCI Express 4-port Gen2 Switch */
+ bridge@1,0 {
+ compatible = "pci10b5,8605";
+ reg = <0x00010000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ bridge@2,1 {
+ compatible = "pci10b5,8605";
+ reg = <0x00020800 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ /* Intel Corporation I210 Gigabit Network Connection */
+ ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00030000 0 0 0 0>;
+ };
+ };
+
+ bridge@2,2 {
+ compatible = "pci10b5,8605";
+ reg = <0x00021000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@4,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00040000 0 0 0 0>;
+ };
+ };
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "eneport1";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport2";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
index b915837..916ea94 100644
--- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi
+++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
@@ -92,6 +92,56 @@
mux-int-port = <1>;
mux-ext-port = <4>;
};
+
+ aliases {
+ mdio-gpio0 = &mdio0;
+ };
+
+ mdio0: mdio-gpio {
+ compatible = "virtual,mdio-gpio";
+ gpios = <&gpio2 5 GPIO_ACTIVE_HIGH>, /* mdc */
+ <&gpio2 7 GPIO_ACTIVE_HIGH>; /* mdio */
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch@0 {
+ compatible = "marvell,mv88e6085"; /* 88e6240*/
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ switch_ports: ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switchphy0: switchphy@0 {
+ reg = <0>;
+ };
+
+ switchphy1: switchphy@1 {
+ reg = <1>;
+ };
+
+ switchphy2: switchphy@2 {
+ reg = <2>;
+ };
+
+ switchphy3: switchphy@3 {
+ reg = <3>;
+ };
+
+ switchphy4: switchphy@4 {
+ reg = <4>;
+ };
+ };
+ };
+ };
};
&ecspi5 {
@@ -326,3 +376,15 @@
tcxo-clock-frequency = <26000000>;
};
};
+
+&pcie {
+ /* Synopsys, Inc. Device */
+ pci_root: root@0,0 {
+ compatible = "pci16c3,abcd";
+ reg = <0x00000000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ };
+};
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 304203f..e60494f 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -54,7 +54,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 350a990..8e0b370 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -259,6 +259,7 @@ config BCM47XX
select LEDS_GPIO_REGISTER
select BCM47XX_NVRAM
select BCM47XX_SPROM
+ select BCM47XX_SSB if !BCM47XX_BCMA
help
Support for BCM47XX based boards
@@ -389,6 +390,7 @@ config LANTIQ
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_MIPS16
select SYS_SUPPORTS_MULTITHREADING
+ select SYS_SUPPORTS_VPE_LOADER
select SYS_HAS_EARLY_PRINTK
select GPIOLIB
select SWAP_IO_SPACE
@@ -516,6 +518,7 @@ config MIPS_MALTA
select SYS_SUPPORTS_MIPS16
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_SMARTMIPS
+ select SYS_SUPPORTS_VPE_LOADER
select SYS_SUPPORTS_ZBOOT
select SYS_SUPPORTS_RELOCATABLE
select USE_OF
@@ -2281,9 +2284,16 @@ config MIPSR2_TO_R6_EMULATOR
The only reason this is a build-time option is to save ~14K from the
final kernel image.
+config SYS_SUPPORTS_VPE_LOADER
+ bool
+ depends on SYS_SUPPORTS_MULTITHREADING
+ help
+ Indicates that the platform supports the VPE loader, and provides
+ physical_memsize.
+
config MIPS_VPE_LOADER
bool "VPE loader support."
- depends on SYS_SUPPORTS_MULTITHREADING && MODULES
+ depends on SYS_SUPPORTS_VPE_LOADER && MODULES
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select MIPS_MT
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 464af5e..0749c37 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -124,30 +124,36 @@ config SCACHE_DEBUGFS
If unsure, say N.
-menuconfig MIPS_CPS_NS16550
+menuconfig MIPS_CPS_NS16550_BOOL
bool "CPS SMP NS16550 UART output"
depends on MIPS_CPS
help
Output debug information via an ns16550 compatible UART if exceptions
occur early in the boot process of a secondary core.
-if MIPS_CPS_NS16550
+if MIPS_CPS_NS16550_BOOL
+
+config MIPS_CPS_NS16550
+ def_bool MIPS_CPS_NS16550_BASE != 0
config MIPS_CPS_NS16550_BASE
hex "UART Base Address"
default 0x1b0003f8 if MIPS_MALTA
+ default 0
help
The base address of the ns16550 compatible UART on which to output
debug information from the early stages of core startup.
+ This is only used if non-zero.
+
config MIPS_CPS_NS16550_SHIFT
int "UART Register Shift"
- default 0 if MIPS_MALTA
+ default 0
help
The number of bits to shift ns16550 register indices by in order to
form their addresses. That is, log base 2 of the span between
adjacent ns16550 registers in the system.
-endif # MIPS_CPS_NS16550
+endif # MIPS_CPS_NS16550_BOOL
endmenu
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 4674f1e..e1675c2 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void)
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
- uart_port.flags = UPF_FIXED_TYPE;
+ uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
uart_port.regshift = 2;
uart_port.line = 0;
diff --git a/arch/mips/ath25/devices.c b/arch/mips/ath25/devices.c
index e115634..301a902 100644
--- a/arch/mips/ath25/devices.c
+++ b/arch/mips/ath25/devices.c
@@ -73,6 +73,7 @@ const char *get_system_type(void)
void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk)
{
+#ifdef CONFIG_SERIAL_8250_CONSOLE
struct uart_port s;
memset(&s, 0, sizeof(s));
@@ -85,6 +86,7 @@ void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk)
s.uartclk = uartclk;
early_serial_setup(&s);
+#endif /* CONFIG_SERIAL_8250_CONSOLE */
}
int __init ath25_add_wmac(int nr, u32 base, int irq)
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index dd5567b..8f5bd04 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core,
*this_cpu_ptr(&cm_core_lock_flags));
} else {
WARN_ON(cluster != 0);
- WARN_ON(vp != 0);
WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL);
/*
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 78c2aff..e84e126 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
# libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
+ ucmpdi2.o
diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h
index 28002ed..199a7f9 100644
--- a/arch/mips/lib/libgcc.h
+++ b/arch/mips/lib/libgcc.h
@@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__)));
struct DWstruct {
int high, low;
};
+
+struct TWstruct {
+ long long high, low;
+};
#elif defined(__LITTLE_ENDIAN)
struct DWstruct {
int low, high;
};
+
+struct TWstruct {
+ long long low, high;
+};
#else
#error I feel sick.
#endif
@@ -23,4 +31,13 @@ typedef union {
long long ll;
} DWunion;
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
+typedef int ti_type __attribute__((mode(TI)));
+
+typedef union {
+ struct TWstruct s;
+ ti_type ti;
+} TWunion;
+#endif
+
#endif /* __ASM_LIBGCC_H */
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c
new file mode 100644
index 0000000..111ad47
--- /dev/null
+++ b/arch/mips/lib/multi3.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+/*
+ * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
+ * specific case only we'll implement it here.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+
+/* multiply 64-bit values, low 64-bits returned */
+static inline long long notrace dmulu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
+static inline long long notrace dmuhu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 128-bit values, low 128-bits returned */
+ti_type notrace __multi3(ti_type a, ti_type b)
+{
+ TWunion res, aa, bb;
+
+ aa.ti = a;
+ bb.ti = b;
+
+ /*
+ * a * b = (a.lo * b.lo)
+ * + 2^64 * (a.hi * b.lo + a.lo * b.hi)
+ * [+ 2^128 * (a.hi * b.hi)]
+ */
+ res.s.low = dmulu(aa.s.low, bb.s.low);
+ res.s.high = dmuhu(aa.s.low, bb.s.low);
+ res.s.high += dmulu(aa.s.high, bb.s.low);
+ res.s.high += dmulu(aa.s.low, bb.s.high);
+
+ return res.ti;
+}
+EXPORT_SYMBOL(__multi3);
+
+#endif /* 64BIT && CPU_MIPSR6 && GCC7 */
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index cdb5a19..9bb6baa 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -40,7 +40,7 @@
#include "uasm.c"
-static const struct insn const insn_table_MM[insn_invalid] = {
+static const struct insn insn_table_MM[insn_invalid] = {
[insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD},
[insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
[insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD},
diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c
index d4469b2..4f46a45 100644
--- a/arch/mips/ralink/timer.c
+++ b/arch/mips/ralink/timer.c
@@ -109,9 +109,9 @@ static int rt_timer_probe(struct platform_device *pdev)
}
rt->irq = platform_get_irq(pdev, 0);
- if (!rt->irq) {
+ if (rt->irq < 0) {
dev_err(&pdev->dev, "failed to load irq\n");
- return -ENOENT;
+ return rt->irq;
}
rt->membase = devm_ioremap_resource(&pdev->dev, res);
diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile
index efdecdb..8186afc 100644
--- a/arch/mips/rb532/Makefile
+++ b/arch/mips/rb532/Makefile
@@ -2,4 +2,6 @@
# Makefile for the RB532 board specific parts of the kernel
#
-obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o
+obj-$(CONFIG_SERIAL_8250_CONSOLE) += serial.o
+
+obj-y += irq.o time.o setup.o prom.o gpio.o devices.o
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 32ea3e6..354d258 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -310,6 +310,8 @@ static int __init plat_setup_devices(void)
return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
}
+#ifdef CONFIG_NET
+
static int __init setup_kmac(char *s)
{
printk(KERN_INFO "korina mac = %s\n", s);
@@ -322,4 +324,6 @@ static int __init setup_kmac(char *s)
__setup("kmac=", setup_kmac);
+#endif /* CONFIG_NET */
+
arch_initcall(plat_setup_devices);
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 61d6049..637b726 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info {
__u32 ap_encodings[8];
};
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61)
+
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1915e86..0a7c887 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,6 +39,10 @@
#include <asm/iommu.h>
#include <asm/switch_to.h>
#include <asm/xive.h>
+#ifdef CONFIG_PPC_PSERIES
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#endif
#include "timing.h"
#include "irq.h"
@@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
+ case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
@@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r;
}
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * These functions check whether the underlying hardware is safe
+ * against attacks based on observing the effects of speculatively
+ * executed instructions, and whether it supplies instructions for
+ * use in workarounds. The information comes from firmware, either
+ * via the device tree on powernv platforms or from an hcall on
+ * pseries platforms.
+ */
+#ifdef CONFIG_PPC_PSERIES
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct h_cpu_char_result c;
+ unsigned long rc;
+
+ if (!machine_is(pseries))
+ return -ENOTTY;
+
+ rc = plpar_get_cpu_characteristics(&c);
+ if (rc == H_SUCCESS) {
+ cp->character = c.character;
+ cp->behaviour = c.behaviour;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
+ KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ }
+ return 0;
+}
+#else
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ return -ENOTTY;
+}
+#endif
+
+static inline bool have_fw_feat(struct device_node *fw_features,
+ const char *state, const char *name)
+{
+ struct device_node *np;
+ bool r = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ r = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+ return r;
+}
+
+static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct device_node *np, *fw_features;
+ int r;
+
+ memset(cp, 0, sizeof(*cp));
+ r = pseries_get_cpu_char(cp);
+ if (r != -ENOTTY)
+ return r;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ if (np) {
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+ if (!fw_features)
+ return 0;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-spec-barrier-ori31,31,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-bcctrl-serialized"))
+ cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-ori30,30,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-trig2"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-l1d-thread-split"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-count-cache-disabled"))
+ cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+
+ if (have_fw_feat(fw_features, "enabled",
+ "speculation-policy-favor-security"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-l1d-flush-msr-pr-0-to-1"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-spec-barrier-for-bound-checks"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+
+ of_node_put(fw_features);
+ }
+
+ return 0;
+}
+#endif
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_PPC_GET_CPU_CHAR: {
+ struct kvm_ppc_cpu_char cpuchar;
+
+ r = kvmppc_get_cpu_char(&cpuchar);
+ if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar)))
+ r = -EFAULT;
+ break;
+ }
default: {
struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index e14f381..c1b0a9a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -207,7 +207,8 @@ struct kvm_s390_sie_block {
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
- __u8 reserved60; /* 0x0060 */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
#define ECB_GS 0x40
#define ECB_TE 0x10
#define ECB_SRSI 0x04
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 38535a57..4cdaa55 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -224,6 +224,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_RICCB (1UL << 7)
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
+#define KVM_SYNC_BPBC (1UL << 10)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -247,7 +248,9 @@ struct kvm_sync_regs {
};
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
- __u8 padding1[52]; /* riccb needs to be 64byte aligned */
+ __u8 bpbc : 1; /* bp mode */
+ __u8 reserved2 : 7;
+ __u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
__u8 padding2[192]; /* sdnx needs to be 256byte aligned */
union {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2c93cbb..2598cf243 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_GS:
r = test_facility(133);
break;
+ case KVM_CAP_S390_BPB:
+ r = test_facility(82);
+ break;
default:
r = 0;
}
@@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
@@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
vcpu->arch.gs_enabled = 1;
}
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
+ test_kvm_facility(vcpu->kvm, 82)) {
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 5d6ae03..7513483 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
memcpy(scb_o->gcr, scb_s->gcr, 128);
scb_o->pp = scb_s->pp;
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82)) {
+ scb_o->fpf &= ~FPF_BPBC;
+ scb_o->fpf |= scb_s->fpf & FPF_BPBC;
+ }
+
/* interrupt intercept */
switch (scb_s->icptcode) {
case ICPT_PROGI:
@@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ecb3 = 0;
scb_s->ecd = 0;
scb_s->fac = 0;
+ scb_s->fpf = 0;
rc = prepare_cpuflags(vcpu, vsie_page);
if (rc)
@@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
prefix_unmapped(vsie_page);
scb_s->ecb |= scb_o->ecb & ECB_TE;
}
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82))
+ scb_s->fpf |= scb_o->fpf & FPF_BPBC;
/* SIMD */
if (test_kvm_facility(vcpu->kvm, 129)) {
scb_s->eca |= scb_o->eca & ECA_VX;
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index 818d3aa..d257186 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
-obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index aa15b4c..ff6f802 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1264,7 +1264,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 7b45d84..4ad4108 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,9 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
/*
* On VMEXIT we must ensure that no RSB predictions learned in the guest
* can be followed in the host, by overwriting the RSB completely. Both
@@ -203,16 +206,17 @@ enum spectre_v2_mitigation {
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- unsigned long loops = RSB_CLEAR_LOOPS / 2;
+ unsigned long loops;
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE("jmp 910f",
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
X86_FEATURE_RETPOLINE)
"910:"
- : "=&r" (loops), ASM_CALL_CONSTRAINT
- : "r" (loops) : "memory" );
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 31051f3..3de6933 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 81bb565..7e2baf7 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,10 +29,13 @@ KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
-OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+ifdef CONFIG_FRAME_POINTER
+OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
+endif
+
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1d616d..868e412 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1785,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 7cb8ba0..ef61f54 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -8,6 +8,7 @@
#include <asm/ftrace.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
.code64
.section .entry.text, "ax"
@@ -20,7 +21,6 @@ EXPORT_SYMBOL(__fentry__)
EXPORT_SYMBOL(mcount)
#endif
-/* All cases save the original rbp (8 bytes) */
#ifdef CONFIG_FRAME_POINTER
# ifdef CC_USING_FENTRY
/* Save parent and function stack frames (rip and rbp) */
@@ -31,7 +31,7 @@ EXPORT_SYMBOL(mcount)
# endif
#else
/* No need to save a stack frame */
-# define MCOUNT_FRAME_SIZE 8
+# define MCOUNT_FRAME_SIZE 0
#endif /* CONFIG_FRAME_POINTER */
/* Size of stack used to save mcount regs in save_mcount_regs */
@@ -64,10 +64,10 @@ EXPORT_SYMBOL(mcount)
*/
.macro save_mcount_regs added=0
- /* Always save the original rbp */
+#ifdef CONFIG_FRAME_POINTER
+ /* Save the original rbp */
pushq %rbp
-#ifdef CONFIG_FRAME_POINTER
/*
* Stack traces will stop at the ftrace trampoline if the frame pointer
* is not set up properly. If fentry is used, we need to save a frame
@@ -105,7 +105,11 @@ EXPORT_SYMBOL(mcount)
* Save the original RBP. Even though the mcount ABI does not
* require this, it helps out callers.
*/
+#ifdef CONFIG_FRAME_POINTER
movq MCOUNT_REG_SIZE-8(%rsp), %rdx
+#else
+ movq %rbp, %rdx
+#endif
movq %rdx, RBP(%rsp)
/* Copy the parent address into %rsi (second parameter) */
@@ -148,7 +152,7 @@ EXPORT_SYMBOL(mcount)
ENTRY(function_hook)
retq
-END(function_hook)
+ENDPROC(function_hook)
ENTRY(ftrace_caller)
/* save_mcount_regs fills in first two parameters */
@@ -184,7 +188,7 @@ GLOBAL(ftrace_graph_call)
/* This is weak to keep gas from relaxing the jumps */
WEAK(ftrace_stub)
retq
-END(ftrace_caller)
+ENDPROC(ftrace_caller)
ENTRY(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
@@ -255,7 +259,7 @@ GLOBAL(ftrace_regs_caller_end)
jmp ftrace_epilogue
-END(ftrace_regs_caller)
+ENDPROC(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
@@ -313,9 +317,10 @@ ENTRY(ftrace_graph_caller)
restore_mcount_regs
retq
-END(ftrace_graph_caller)
+ENDPROC(ftrace_graph_caller)
-GLOBAL(return_to_handler)
+ENTRY(return_to_handler)
+ UNWIND_HINT_EMPTY
subq $24, %rsp
/* Save the return values */
@@ -330,4 +335,5 @@ GLOBAL(return_to_handler)
movq (%rsp), %rax
addq $24, %rsp
JMP_NOSPEC %rdi
+END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index e941136..203d3988 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -40,6 +40,7 @@
#include <asm/debugreg.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
+#include <asm/nospec-branch.h>
#include "common.h"
@@ -203,7 +204,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
}
/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -237,6 +238,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}
+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 832a6ac..cb368c2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy)
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+ /*
+ * Use wbinvd on processors that support SME. This provides support
+ * for performing a successful kexec when going from SME inactive
+ * to SME active (or vice-versa). The cache must be cleared so that
+ * if there are entries with the same physical address, both with and
+ * without the encryption bit, they don't race each other when flushed
+ * and potentially end up with the wrong entry being committed to
+ * memory.
+ */
+ if (boot_cpu_has(X86_FEATURE_SME))
+ native_wbinvd();
for (;;) {
/*
- * Use wbinvd followed by hlt to stop the processor. This
- * provides support for kexec on a processor that supports
- * SME. With kexec, going from SME inactive to SME active
- * requires clearing cache entries so that addresses without
- * the encryption bit set don't corrupt the same physical
- * address that has the encryption bit set when caches are
- * flushed. To achieve this a wbinvd is performed followed by
- * a hlt. Even if the processor is not in the kexec/SME
- * scenario this only adds a wbinvd to a halting processor.
+ * Use native_halt() so that memory contents don't change
+ * (stack usage and variables) after possibly issuing the
+ * native_wbinvd() above.
*/
- asm volatile("wbinvd; hlt" : : : "memory");
+ native_halt();
}
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index be86a86..1f9188f 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -74,8 +74,50 @@ static struct orc_entry *orc_module_find(unsigned long ip)
}
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+static struct orc_entry *orc_find(unsigned long ip);
+
+/*
+ * Ftrace dynamic trampolines do not have orc entries of their own.
+ * But they are copies of the ftrace entries that are static and
+ * defined in ftrace_*.S, which do have orc entries.
+ *
+ * If the undwinder comes across a ftrace trampoline, then find the
+ * ftrace function that was used to create it, and use that ftrace
+ * function's orc entrie, as the placement of the return code in
+ * the stack will be identical.
+ */
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+ struct ftrace_ops *ops;
+ unsigned long caller;
+
+ ops = ftrace_ops_trampoline(ip);
+ if (!ops)
+ return NULL;
+
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+ caller = (unsigned long)ftrace_regs_call;
+ else
+ caller = (unsigned long)ftrace_call;
+
+ /* Prevent unlikely recursion */
+ if (ip == caller)
+ return NULL;
+
+ return orc_find(caller);
+}
+#else
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+ return NULL;
+}
+#endif
+
static struct orc_entry *orc_find(unsigned long ip)
{
+ static struct orc_entry *orc;
+
if (!orc_init)
return NULL;
@@ -111,7 +153,11 @@ static struct orc_entry *orc_find(unsigned long ip)
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
/* Module lookup: */
- return orc_module_find(ip);
+ orc = orc_module_find(ip);
+ if (orc)
+ return orc;
+
+ return orc_ftrace_find(ip);
}
static void orc_sort_swap(void *_a, void *_b, int size)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1e413a93..9b138a0 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -124,6 +124,12 @@ SECTIONS
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1cec2c6..c53298d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7496,13 +7496,13 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
- if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
* When EFER.LME and CR0.PG are set, the processor is in
* 64-bit mode (though maybe in a 32-bit code segment).
* CR4.PAE and EFER.LMA must be set.
*/
- if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+ if (!(sregs->cr4 & X86_CR4_PAE)
|| !(sregs->efer & EFER_LMA))
return -EINVAL;
} else {
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cb45c6c..dfb2ba9 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -9,7 +9,7 @@
#include <asm/nospec-branch.h>
.macro THUNK reg
- .section .text.__x86.indirect_thunk.\reg
+ .section .text.__x86.indirect_thunk
ENTRY(__x86_indirect_thunk_\reg)
CFI_STARTPROC
@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
* than one per register with the correct names. So we do it
* the simple and nasty way...
*/
-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
GENERATE_THUNK(_ASM_AX)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3ef362f..e1d61e8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -738,7 +738,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}
-void __init sme_encrypt_kernel(struct boot_params *bp)
+void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index f6a26e3..54ef19e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,11 +662,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
*/
static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
{
+ static const char *name = "PCI Bus 0000:00";
+ struct resource *res, *conflict;
u32 base, limit, high;
struct pci_dev *other;
- struct resource *res;
unsigned i;
- int r;
if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
return;
@@ -707,21 +707,26 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
* Allocate a 256GB window directly below the 0xfd00000000 hardware
* limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
*/
- res->name = "PCI Bus 0000:00";
+ res->name = name;
res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
res->start = 0xbd00000000ull;
res->end = 0xfd00000000ull - 1;
- r = request_resource(&iomem_resource, res);
- if (r) {
+ conflict = request_resource_conflict(&iomem_resource, res);
+ if (conflict) {
kfree(res);
- return;
- }
+ if (conflict->name != name)
+ return;
- dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
- res);
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ /* We are resuming from suspend; just reenable the window */
+ res = conflict;
+ } else {
+ dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+ res);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ pci_bus_add_resource(dev->bus, res, 0);
+ }
base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
@@ -733,13 +738,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
pci_write_config_dword(dev, AMD_141b_MMIO_HIGH(i), high);
pci_write_config_dword(dev, AMD_141b_MMIO_LIMIT(i), limit);
pci_write_config_dword(dev, AMD_141b_MMIO_BASE(i), base);
-
- pci_bus_add_resource(dev->bus, res, 0);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
#endif
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index afa4cb3..6659f11 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -323,6 +323,7 @@ static const struct {
{ 0x410e, "BCM43341B0" }, /* 002.001.014 */
{ 0x4406, "BCM4324B3" }, /* 002.004.006 */
{ 0x610c, "BCM4354" }, /* 003.001.012 */
+ { 0x2122, "BCM4343A0" }, /* 001.001.034 */
{ 0x2209, "BCM43430A1" }, /* 001.002.009 */
{ 0x6119, "BCM4345C0" }, /* 003.001.025 */
{ 0x230f, "BCM4356A2" }, /* 001.003.015 */
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 07f00e4..5270d55 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -24,6 +24,7 @@
#include <linux/module.h>
#include <linux/firmware.h>
#include <linux/regmap.h>
+#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -569,6 +570,160 @@ struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
}
EXPORT_SYMBOL_GPL(btintel_regmap_init);
+int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param)
+{
+ struct intel_reset params = { 0x00, 0x01, 0x00, 0x01, 0x00000000 };
+ struct sk_buff *skb;
+
+ params.boot_param = cpu_to_le32(boot_param);
+
+ skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(params), &params,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Failed to send Intel Reset command");
+ return PTR_ERR(skb);
+ }
+
+ kfree_skb(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_send_intel_reset);
+
+int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Reading Intel boot parameters failed (%ld)",
+ PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*params)) {
+ bt_dev_err(hdev, "Intel boot parameters size mismatch");
+ kfree_skb(skb);
+ return -EILSEQ;
+ }
+
+ memcpy(params, skb->data, sizeof(*params));
+
+ kfree_skb(skb);
+
+ if (params->status) {
+ bt_dev_err(hdev, "Intel boot parameters command failed (%02x)",
+ params->status);
+ return -bt_to_errno(params->status);
+ }
+
+ bt_dev_info(hdev, "Device revision is %u",
+ le16_to_cpu(params->dev_revid));
+
+ bt_dev_info(hdev, "Secure boot is %s",
+ params->secure_boot ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "OTP lock is %s",
+ params->otp_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "API lock is %s",
+ params->api_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "Debug lock is %s",
+ params->debug_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
+ params->min_fw_build_nn, params->min_fw_build_cw,
+ 2000 + params->min_fw_build_yy);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_read_boot_params);
+
+int btintel_download_firmware(struct hci_dev *hdev, const struct firmware *fw,
+ u32 *boot_param)
+{
+ int err;
+ const u8 *fw_ptr;
+ u32 frag_len;
+
+ /* Start the firmware download transaction with the Init fragment
+ * represented by the 128 bytes of CSS header.
+ */
+ err = btintel_secure_send(hdev, 0x00, 128, fw->data);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware header (%d)", err);
+ goto done;
+ }
+
+ /* Send the 256 bytes of public key information from the firmware
+ * as the PKey fragment.
+ */
+ err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware pkey (%d)", err);
+ goto done;
+ }
+
+ /* Send the 256 bytes of signature information from the firmware
+ * as the Sign fragment.
+ */
+ err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware signature (%d)", err);
+ goto done;
+ }
+
+ fw_ptr = fw->data + 644;
+ frag_len = 0;
+
+ while (fw_ptr - fw->data < fw->size) {
+ struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
+
+ /* Each SKU has a different reset parameter to use in the
+ * HCI_Intel_Reset command and it is embedded in the firmware
+ * data. So, instead of using static value per SKU, check
+ * the firmware data and save it for later use.
+ */
+ if (le16_to_cpu(cmd->opcode) == 0xfc0e) {
+ /* The boot parameter is the first 32-bit value
+ * and rest of 3 octets are reserved.
+ */
+ *boot_param = get_unaligned_le32(fw_ptr + sizeof(*cmd));
+
+ bt_dev_dbg(hdev, "boot_param=0x%x", *boot_param);
+ }
+
+ frag_len += sizeof(*cmd) + cmd->plen;
+
+ /* The parameter length of the secure send command requires
+ * a 4 byte alignment. It happens so that the firmware file
+ * contains proper Intel_NOP commands to align the fragments
+ * as needed.
+ *
+ * Send set of commands with 4 byte alignment from the
+ * firmware data buffer as a single Data fragement.
+ */
+ if (!(frag_len % 4)) {
+ err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
+ if (err < 0) {
+ bt_dev_err(hdev,
+ "Failed to send firmware data (%d)",
+ err);
+ goto done;
+ }
+
+ fw_ptr += frag_len;
+ frag_len = 0;
+ }
+ }
+
+done:
+ return err;
+}
+EXPORT_SYMBOL_GPL(btintel_download_firmware);
+
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
MODULE_VERSION(VERSION);
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 1e8955a..41c642c 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -69,6 +69,14 @@ struct intel_secure_send_result {
__u8 status;
} __packed;
+struct intel_reset {
+ __u8 reset_type;
+ __u8 patch_enable;
+ __u8 ddc_reload;
+ __u8 boot_option;
+ __le32 boot_param;
+} __packed;
+
#if IS_ENABLED(CONFIG_BT_INTEL)
int btintel_check_bdaddr(struct hci_dev *hdev);
@@ -89,7 +97,11 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver);
struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
u16 opcode_write);
-
+int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param);
+int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params);
+int btintel_download_firmware(struct hci_dev *dev, const struct firmware *fw,
+ u32 *boot_param);
#else
static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -165,4 +177,23 @@ static inline struct regmap *btintel_regmap_init(struct hci_dev *hdev,
{
return ERR_PTR(-EINVAL);
}
+
+static inline int btintel_send_intel_reset(struct hci_dev *hdev,
+ u32 reset_param)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int btintel_download_firmware(struct hci_dev *dev,
+ const struct firmware *fw,
+ u32 *boot_param)
+{
+ return -EOPNOTSUPP;
+}
#endif
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 29977eb..2a55380 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2009,15 +2009,11 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
static int btusb_setup_intel_new(struct hci_dev *hdev)
{
- static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
- 0x00, 0x08, 0x04, 0x00 };
struct btusb_data *data = hci_get_drvdata(hdev);
- struct sk_buff *skb;
struct intel_version ver;
- struct intel_boot_params *params;
+ struct intel_boot_params params;
const struct firmware *fw;
- const u8 *fw_ptr;
- u32 frag_len;
+ u32 boot_param;
char fwname[64];
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -2025,6 +2021,12 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
BT_DBG("%s", hdev->name);
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
calltime = ktime_get();
/* Read the Intel version information to determine if the device
@@ -2095,55 +2097,24 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
/* Read the secure boot parameters to identify the operating
* details of the bootloader.
*/
- skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- BT_ERR("%s: Reading Intel boot parameters failed (%ld)",
- hdev->name, PTR_ERR(skb));
- return PTR_ERR(skb);
- }
-
- if (skb->len != sizeof(*params)) {
- BT_ERR("%s: Intel boot parameters size mismatch", hdev->name);
- kfree_skb(skb);
- return -EILSEQ;
- }
-
- params = (struct intel_boot_params *)skb->data;
-
- bt_dev_info(hdev, "Device revision is %u",
- le16_to_cpu(params->dev_revid));
-
- bt_dev_info(hdev, "Secure boot is %s",
- params->secure_boot ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "OTP lock is %s",
- params->otp_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "API lock is %s",
- params->api_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Debug lock is %s",
- params->debug_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
- params->min_fw_build_nn, params->min_fw_build_cw,
- 2000 + params->min_fw_build_yy);
+ err = btintel_read_boot_params(hdev, &params);
+ if (err)
+ return err;
/* It is required that every single firmware fragment is acknowledged
* with a command complete event. If the boot parameters indicate
* that this bootloader does not send them, then abort the setup.
*/
- if (params->limited_cce != 0x00) {
+ if (params.limited_cce != 0x00) {
BT_ERR("%s: Unsupported Intel firmware loading method (%u)",
- hdev->name, params->limited_cce);
- kfree_skb(skb);
+ hdev->name, params.limited_cce);
return -EINVAL;
}
/* If the OTP has no valid Bluetooth device address, then there will
* also be no valid address for the operational firmware.
*/
- if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+ if (!bacmp(&params.otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
}
@@ -2174,7 +2145,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
case 0x0c: /* WsP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ le16_to_cpu(params.dev_revid));
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
@@ -2192,7 +2163,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
if (err < 0) {
BT_ERR("%s: Failed to load Intel firmware file (%d)",
hdev->name, err);
- kfree_skb(skb);
return err;
}
@@ -2206,7 +2176,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
case 0x0c: /* WsP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ le16_to_cpu(params.dev_revid));
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
@@ -2220,8 +2190,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
return -EINVAL;
}
- kfree_skb(skb);
-
if (fw->size < 644) {
BT_ERR("%s: Invalid size of firmware file (%zu)",
hdev->name, fw->size);
@@ -2231,64 +2199,10 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
set_bit(BTUSB_DOWNLOADING, &data->flags);
- /* Start the firmware download transaction with the Init fragment
- * represented by the 128 bytes of CSS header.
- */
- err = btintel_secure_send(hdev, 0x00, 128, fw->data);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware header (%d)",
- hdev->name, err);
- goto done;
- }
-
- /* Send the 256 bytes of public key information from the firmware
- * as the PKey fragment.
- */
- err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware public key (%d)",
- hdev->name, err);
- goto done;
- }
-
- /* Send the 256 bytes of signature information from the firmware
- * as the Sign fragment.
- */
- err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware signature (%d)",
- hdev->name, err);
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_firmware(hdev, fw, &boot_param);
+ if (err < 0)
goto done;
- }
-
- fw_ptr = fw->data + 644;
- frag_len = 0;
-
- while (fw_ptr - fw->data < fw->size) {
- struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
-
- frag_len += sizeof(*cmd) + cmd->plen;
-
- /* The parameter length of the secure send command requires
- * a 4 byte alignment. It happens so that the firmware file
- * contains proper Intel_NOP commands to align the fragments
- * as needed.
- *
- * Send set of commands with 4 byte alignment from the
- * firmware data buffer as a single Data fragement.
- */
- if (!(frag_len % 4)) {
- err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware data (%d)",
- hdev->name, err);
- goto done;
- }
-
- fw_ptr += frag_len;
- frag_len = 0;
- }
- }
set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
@@ -2341,12 +2255,9 @@ done:
set_bit(BTUSB_BOOTING, &data->flags);
- skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(reset_param), reset_param,
- HCI_INIT_TIMEOUT);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- kfree_skb(skb);
+ err = btintel_send_intel_reset(hdev, boot_param);
+ if (err)
+ return err;
/* The bootloader will not indicate when the device is ready. This
* is done by the operational firmware sending bootup notification.
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 64800cd..0438a64 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -412,8 +412,11 @@ out:
return 0;
err_unset_hu:
+ if (hu->serdev)
+ serdev_device_close(hu->serdev);
#ifdef CONFIG_PM
- bcm->dev->hu = NULL;
+ else
+ bcm->dev->hu = NULL;
#endif
err_free:
mutex_unlock(&bcm_device_lock);
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index aad07e4..7c166e3 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -540,18 +540,15 @@ static int intel_set_baudrate(struct hci_uart *hu, unsigned int speed)
static int intel_setup(struct hci_uart *hu)
{
- static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
- 0x00, 0x08, 0x04, 0x00 };
struct intel_data *intel = hu->priv;
struct hci_dev *hdev = hu->hdev;
struct sk_buff *skb;
struct intel_version ver;
- struct intel_boot_params *params;
+ struct intel_boot_params params;
struct list_head *p;
const struct firmware *fw;
- const u8 *fw_ptr;
char fwname[64];
- u32 frag_len;
+ u32 boot_param;
ktime_t calltime, delta, rettime;
unsigned long long duration;
unsigned int init_speed, oper_speed;
@@ -563,6 +560,12 @@ static int intel_setup(struct hci_uart *hu)
hu->hdev->set_diag = btintel_set_diag;
hu->hdev->set_bdaddr = btintel_set_bdaddr;
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
calltime = ktime_get();
if (hu->init_speed)
@@ -656,85 +659,95 @@ static int intel_setup(struct hci_uart *hu)
/* Read the secure boot parameters to identify the operating
* details of the bootloader.
*/
- skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "Reading Intel boot parameters failed (%ld)",
- PTR_ERR(skb));
- return PTR_ERR(skb);
- }
-
- if (skb->len != sizeof(*params)) {
- bt_dev_err(hdev, "Intel boot parameters size mismatch");
- kfree_skb(skb);
- return -EILSEQ;
- }
-
- params = (struct intel_boot_params *)skb->data;
- if (params->status) {
- bt_dev_err(hdev, "Intel boot parameters command failure (%02x)",
- params->status);
- err = -bt_to_errno(params->status);
- kfree_skb(skb);
+ err = btintel_read_boot_params(hdev, &params);
+ if (err)
return err;
- }
-
- bt_dev_info(hdev, "Device revision is %u",
- le16_to_cpu(params->dev_revid));
-
- bt_dev_info(hdev, "Secure boot is %s",
- params->secure_boot ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
- params->min_fw_build_nn, params->min_fw_build_cw,
- 2000 + params->min_fw_build_yy);
/* It is required that every single firmware fragment is acknowledged
* with a command complete event. If the boot parameters indicate
* that this bootloader does not send them, then abort the setup.
*/
- if (params->limited_cce != 0x00) {
+ if (params.limited_cce != 0x00) {
bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
- params->limited_cce);
- kfree_skb(skb);
+ params.limited_cce);
return -EINVAL;
}
/* If the OTP has no valid Bluetooth device address, then there will
* also be no valid address for the operational firmware.
*/
- if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+ if (!bacmp(&params.otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
}
/* With this Intel bootloader only the hardware variant and device
- * revision information are used to select the right firmware.
+ * revision information are used to select the right firmware for SfP
+ * and WsP.
*
* The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
*
* Currently the supported hardware variants are:
* 11 (0x0b) for iBT 3.0 (LnP/SfP)
+ * 12 (0x0c) for iBT 3.5 (WsP)
+ *
+ * For ThP/JfP and for future SKU's, the FW name varies based on HW
+ * variant, HW revision and FW revision, as these are dependent on CNVi
+ * and RF Combination.
+ *
+ * 18 (0x12) for iBT3.5 (ThP/JfP)
+ *
+ * The firmware file name for these will be
+ * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
+ *
*/
- snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
- le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ switch (ver.hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(params.dev_revid));
+ break;
+ case 0x12: /* ThP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(ver.hw_revision),
+ le16_to_cpu(ver.fw_revision));
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
+ ver.hw_variant);
+ return -EINVAL;
+ }
err = request_firmware(&fw, fwname, &hdev->dev);
if (err < 0) {
bt_dev_err(hdev, "Failed to load Intel firmware file (%d)",
err);
- kfree_skb(skb);
return err;
}
bt_dev_info(hdev, "Found device firmware: %s", fwname);
/* Save the DDC file name for later */
- snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
- le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
-
- kfree_skb(skb);
+ switch (ver.hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(params.dev_revid));
+ break;
+ case 0x12: /* ThP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(ver.hw_revision),
+ le16_to_cpu(ver.fw_revision));
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
+ ver.hw_variant);
+ return -EINVAL;
+ }
if (fw->size < 644) {
bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
@@ -745,70 +758,10 @@ static int intel_setup(struct hci_uart *hu)
set_bit(STATE_DOWNLOADING, &intel->flags);
- /* Start the firmware download transaction with the Init fragment
- * represented by the 128 bytes of CSS header.
- */
- err = btintel_secure_send(hdev, 0x00, 128, fw->data);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware header (%d)", err);
- goto done;
- }
-
- /* Send the 256 bytes of public key information from the firmware
- * as the PKey fragment.
- */
- err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware public key (%d)",
- err);
- goto done;
- }
-
- /* Send the 256 bytes of signature information from the firmware
- * as the Sign fragment.
- */
- err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware signature (%d)",
- err);
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_firmware(hdev, fw, &boot_param);
+ if (err < 0)
goto done;
- }
-
- fw_ptr = fw->data + 644;
- frag_len = 0;
-
- while (fw_ptr - fw->data < fw->size) {
- struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
-
- frag_len += sizeof(*cmd) + cmd->plen;
-
- bt_dev_dbg(hdev, "Patching %td/%zu", (fw_ptr - fw->data),
- fw->size);
-
- /* The parameter length of the secure send command requires
- * a 4 byte alignment. It happens so that the firmware file
- * contains proper Intel_NOP commands to align the fragments
- * as needed.
- *
- * Send set of commands with 4 byte alignment from the
- * firmware data buffer as a single Data fragement.
- */
- if (frag_len % 4)
- continue;
-
- /* Send each command from the firmware data buffer as
- * a single Data fragment.
- */
- err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware data (%d)",
- err);
- goto done;
- }
-
- fw_ptr += frag_len;
- frag_len = 0;
- }
set_bit(STATE_FIRMWARE_LOADED, &intel->flags);
@@ -869,12 +822,9 @@ done:
set_bit(STATE_BOOTING, &intel->flags);
- skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(reset_param), reset_param,
- HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- kfree_skb(skb);
+ err = btintel_send_intel_reset(hdev, boot_param);
+ if (err)
+ return err;
/* The bootloader will not indicate when the device is ready. This
* is done by the operational firmware sending bootup notification.
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9fc12f5..554d603 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1954,10 +1954,15 @@ static int crypt_setkey(struct crypt_config *cc)
/* Ignore extra keys (which are used for IV etc) */
subkey_size = crypt_subkey_size(cc);
- if (crypt_integrity_hmac(cc))
+ if (crypt_integrity_hmac(cc)) {
+ if (subkey_size < cc->key_mac_size)
+ return -EINVAL;
+
crypt_copy_authenckey(cc->authenc_key, cc->key,
subkey_size - cc->key_mac_size,
cc->key_mac_size);
+ }
+
for (i = 0; i < cc->tfms_count; i++) {
if (crypt_integrity_hmac(cc))
r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i],
@@ -2053,9 +2058,6 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
ret = crypt_setkey(cc);
- /* wipe the kernel key payload copy in each case */
- memset(cc->key, 0, cc->key_size * sizeof(u8));
-
if (!ret) {
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
kzfree(cc->key_string);
@@ -2523,6 +2525,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key)
}
}
+ /* wipe the kernel key payload copy */
+ if (cc->key_string)
+ memset(cc->key, 0, cc->key_size * sizeof(u8));
+
return ret;
}
@@ -2740,6 +2746,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors * cc->on_disk_tag_size);
if (!cc->tag_pool) {
ti->error = "Cannot allocate integrity tags mempool";
+ ret = -ENOMEM;
goto bad;
}
@@ -2961,6 +2968,9 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
return ret;
if (cc->iv_gen_ops && cc->iv_gen_ops->init)
ret = cc->iv_gen_ops->init(cc);
+ /* wipe the kernel key payload copy */
+ if (cc->key_string)
+ memset(cc->key, 0, cc->key_size * sizeof(u8));
return ret;
}
if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
@@ -3007,7 +3017,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 18, 0},
+ .version = {1, 18, 1},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 05c7bfd..46d7c87 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2559,7 +2559,8 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
int r = 0;
unsigned i;
__u64 journal_pages, journal_desc_size, journal_tree_size;
- unsigned char *crypt_data = NULL;
+ unsigned char *crypt_data = NULL, *crypt_iv = NULL;
+ struct skcipher_request *req = NULL;
ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
@@ -2617,9 +2618,20 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
if (blocksize == 1) {
struct scatterlist *sg;
- SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
- unsigned char iv[ivsize];
- skcipher_request_set_tfm(req, ic->journal_crypt);
+
+ req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
+ if (!req) {
+ *error = "Could not allocate crypt request";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ crypt_iv = kmalloc(ivsize, GFP_KERNEL);
+ if (!crypt_iv) {
+ *error = "Could not allocate iv";
+ r = -ENOMEM;
+ goto bad;
+ }
ic->journal_xor = dm_integrity_alloc_page_list(ic);
if (!ic->journal_xor) {
@@ -2641,9 +2653,9 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
sg_set_buf(&sg[i], va, PAGE_SIZE);
}
sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
- memset(iv, 0x00, ivsize);
+ memset(crypt_iv, 0x00, ivsize);
- skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv);
+ skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
init_completion(&comp.comp);
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
if (do_crypt(true, req, &comp))
@@ -2659,10 +2671,22 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
crypto_free_skcipher(ic->journal_crypt);
ic->journal_crypt = NULL;
} else {
- SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
- unsigned char iv[ivsize];
unsigned crypt_len = roundup(ivsize, blocksize);
+ req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
+ if (!req) {
+ *error = "Could not allocate crypt request";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ crypt_iv = kmalloc(ivsize, GFP_KERNEL);
+ if (!crypt_iv) {
+ *error = "Could not allocate iv";
+ r = -ENOMEM;
+ goto bad;
+ }
+
crypt_data = kmalloc(crypt_len, GFP_KERNEL);
if (!crypt_data) {
*error = "Unable to allocate crypt data";
@@ -2670,8 +2694,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
goto bad;
}
- skcipher_request_set_tfm(req, ic->journal_crypt);
-
ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
if (!ic->journal_scatterlist) {
*error = "Unable to allocate sg list";
@@ -2695,12 +2717,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
struct skcipher_request *section_req;
__u32 section_le = cpu_to_le32(i);
- memset(iv, 0x00, ivsize);
+ memset(crypt_iv, 0x00, ivsize);
memset(crypt_data, 0x00, crypt_len);
memcpy(crypt_data, &section_le, min((size_t)crypt_len, sizeof(section_le)));
sg_init_one(&sg, crypt_data, crypt_len);
- skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv);
+ skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
init_completion(&comp.comp);
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
if (do_crypt(true, req, &comp))
@@ -2758,6 +2780,9 @@ retest_commit_id:
}
bad:
kfree(crypt_data);
+ kfree(crypt_iv);
+ skcipher_request_free(req);
+
return r;
}
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index d31d18d..36ef284 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -80,10 +80,14 @@
#define SECTOR_TO_BLOCK_SHIFT 3
/*
+ * For btree insert:
* 3 for btree insert +
* 2 for btree lookup used within space map
+ * For btree remove:
+ * 2 for shadow spine +
+ * 4 for rebalance 3 child node
*/
-#define THIN_MAX_CONCURRENT_LOCKS 5
+#define THIN_MAX_CONCURRENT_LOCKS 6
/* This should be plenty */
#define SPACE_MAP_ROOT_SIZE 128
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index f21ce6a..58b3197 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -683,23 +683,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
pn->keys[1] = rn->keys[0];
memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
- /*
- * rejig the spine. This is ugly, since it knows too
- * much about the spine
- */
- if (s->nodes[0] != new_parent) {
- unlock_block(s->info, s->nodes[0]);
- s->nodes[0] = new_parent;
- }
- if (key < le64_to_cpu(rn->keys[0])) {
- unlock_block(s->info, right);
- s->nodes[1] = left;
- } else {
- unlock_block(s->info, left);
- s->nodes[1] = right;
- }
- s->count = 2;
-
+ unlock_block(s->info, left);
+ unlock_block(s->info, right);
return 0;
}
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index cc94604..b177956 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -412,7 +412,7 @@ EXPORT_SYMBOL_GPL(can_change_state);
* Local echo of CAN messages
*
* CAN network devices *should* support a local echo functionality
- * (see Documentation/networking/can.txt). To test the handling of CAN
+ * (see Documentation/networking/can.rst). To test the handling of CAN
* interfaces that do not support the local echo both driver types are
* implemented. In the case that the driver does not support the echo
* the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index a8cb332..c2b04f5 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -61,7 +61,7 @@ MODULE_ALIAS_RTNL_LINK(DRV_NAME);
/*
* CAN test feature:
* Enable the echo on driver level for testing the CAN core echo modes.
- * See Documentation/networking/can.txt for details.
+ * See Documentation/networking/can.rst for details.
*/
static bool echo; /* echo testing. Default: 0 (Off) */
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 7919f61..5e34b34 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -5818,8 +5818,8 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
struct l2_fhdr *rx_hdr;
int ret = -ENODEV;
struct bnx2_napi *bnapi = &bp->bnx2_napi[0], *tx_napi;
- struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
- struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;
+ struct bnx2_tx_ring_info *txr;
+ struct bnx2_rx_ring_info *rxr;
tx_napi = bnapi;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6b7e996..4b001d2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7778,7 +7778,8 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct bnxt *bp = cb_priv;
- if (!bnxt_tc_flower_enabled(bp) || !tc_can_offload(bp->dev))
+ if (!bnxt_tc_flower_enabled(bp) ||
+ !tc_cls_can_offload_and_chain0(bp->dev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 2ece164..fbe6e20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -1474,9 +1474,6 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
{
int rc = 0;
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 2ca11be..2629040 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -124,7 +124,8 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type,
struct bnxt *bp = vf_rep->bp;
int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
- if (!bnxt_tc_flower_enabled(vf_rep->bp) || !tc_can_offload(bp->dev))
+ if (!bnxt_tc_flower_enabled(vf_rep->bp) ||
+ !tc_cls_can_offload_and_chain0(bp->dev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 5713e83..e2cdfa7 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -69,6 +69,7 @@ config CHELSIO_T4
depends on PCI && (IPV6 || IPV6=n)
select FW_LOADER
select MDIO
+ select ZLIB_DEFLATE
---help---
This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 5df9237..53b6a02 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -8,8 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
- cudbg_common.o cudbg_lib.o
+ cudbg_common.o cudbg_lib.o cudbg_zlib.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
-cxgb4-$(CONFIG_ZLIB_DEFLATE) += cudbg_zlib.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 8b95117..557fd8b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1567,6 +1567,12 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid1->ver_hdr.size = sizeof(struct cudbg_tid_info_region_rev1) -
sizeof(struct cudbg_ver_hdr);
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (!is_fw_attached(pdbg_init))
+ goto fill_tid;
+
#define FW_PARAM_PFVF_A(param) \
(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \
@@ -1604,6 +1610,9 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid->nhpftids = val[1] - val[0] + 1;
}
+#undef FW_PARAM_PFVF_A
+
+fill_tid:
tid->ntids = padap->tids.ntids;
tid->nstids = padap->tids.nstids;
tid->stid_base = padap->tids.stid_base;
@@ -1623,8 +1632,6 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid->ip_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV4_A);
tid->ipv6_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV6_A);
-#undef FW_PARAM_PFVF_A
-
return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
}
@@ -1866,11 +1873,18 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
max_ctx_size = region_info[i].end - region_info[i].start + 1;
max_ctx_qid = max_ctx_size / SGE_CTXT_SIZE;
- t4_sge_ctxt_flush(padap, padap->mbox, i);
- rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type[i],
- region_info[i].start, max_ctx_size,
- (__be32 *)ctx_buf, 1);
- if (rc) {
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (is_fw_attached(pdbg_init)) {
+ t4_sge_ctxt_flush(padap, padap->mbox, i);
+
+ rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type[i],
+ region_info[i].start, max_ctx_size,
+ (__be32 *)ctx_buf, 1);
+ }
+
+ if (rc || !is_fw_attached(pdbg_init)) {
max_ctx_qid = CUDBG_LOWMEM_MAX_CTXT_QIDS;
cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, i,
&buff);
@@ -1946,9 +1960,10 @@ static void cudbg_mps_rpl_backdoor(struct adapter *padap,
mps_rplc->rplc31_0 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP0_A));
}
-static int cudbg_collect_tcam_index(struct adapter *padap,
+static int cudbg_collect_tcam_index(struct cudbg_init *pdbg_init,
struct cudbg_mps_tcam *tcam, u32 idx)
{
+ struct adapter *padap = pdbg_init->adap;
u64 tcamy, tcamx, val;
u32 ctl, data2;
int rc = 0;
@@ -2033,12 +2048,22 @@ static int cudbg_collect_tcam_index(struct adapter *padap,
htons(FW_LDST_CMD_FID_V(FW_LDST_MPS_RPLC) |
FW_LDST_CMD_IDX_V(idx));
- rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd, sizeof(ldst_cmd),
- &ldst_cmd);
- if (rc)
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (is_fw_attached(pdbg_init))
+ rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd,
+ sizeof(ldst_cmd), &ldst_cmd);
+
+ if (rc || !is_fw_attached(pdbg_init)) {
cudbg_mps_rpl_backdoor(padap, &mps_rplc);
- else
+ /* Ignore error since we collected directly from
+ * reading registers.
+ */
+ rc = 0;
+ } else {
mps_rplc = ldst_cmd.u.mps.rplc;
+ }
tcam->rplc[0] = ntohl(mps_rplc.rplc31_0);
tcam->rplc[1] = ntohl(mps_rplc.rplc63_32);
@@ -2075,7 +2100,7 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init,
tcam = (struct cudbg_mps_tcam *)temp_buff.data;
for (i = 0; i < n; i++) {
- rc = cudbg_collect_tcam_index(padap, tcam, i);
+ rc = cudbg_collect_tcam_index(pdbg_init, tcam, i);
if (rc) {
cudbg_err->sys_err = rc;
cudbg_put_buff(pdbg_init, &temp_buff);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
index 4c3854c..25cc06d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
@@ -40,8 +40,8 @@ int cudbg_compress_buff(struct cudbg_init *pdbg_init,
struct cudbg_buffer *pin_buff,
struct cudbg_buffer *pout_buff)
{
- struct z_stream_s compress_stream = { 0 };
struct cudbg_buffer temp_buff = { 0 };
+ struct z_stream_s compress_stream;
struct cudbg_compress_hdr *c_hdr;
int rc;
@@ -53,6 +53,7 @@ int cudbg_compress_buff(struct cudbg_init *pdbg_init,
c_hdr = (struct cudbg_compress_hdr *)temp_buff.data;
c_hdr->compress_id = CUDBG_ZLIB_COMPRESS_ID;
+ memset(&compress_stream, 0, sizeof(struct z_stream_s));
compress_stream.workspace = pdbg_init->workspace;
rc = zlib_deflateInit2(&compress_stream, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, CUDBG_ZLIB_WIN_BITS,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
index 9d55c4c..60d2380 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
@@ -33,24 +33,11 @@ struct cudbg_compress_hdr {
static inline int cudbg_get_workspace_size(void)
{
-#ifdef CONFIG_ZLIB_DEFLATE
return zlib_deflate_workspacesize(CUDBG_ZLIB_WIN_BITS,
CUDBG_ZLIB_MEM_LVL);
-#else
- return 0;
-#endif /* CONFIG_ZLIB_DEFLATE */
}
-#ifndef CONFIG_ZLIB_DEFLATE
-static inline int cudbg_compress_buff(struct cudbg_init *pdbg_init,
- struct cudbg_buffer *pin_buff,
- struct cudbg_buffer *pout_buff)
-{
- return 0;
-}
-#else
int cudbg_compress_buff(struct cudbg_init *pdbg_init,
struct cudbg_buffer *pin_buff,
struct cudbg_buffer *pout_buff);
-#endif /* CONFIG_ZLIB_DEFLATE */
#endif /* __CUDBG_ZLIB_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 3e02187..4294673 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -820,6 +820,7 @@ struct vf_info {
unsigned char vf_mac_addr[ETH_ALEN];
unsigned int tx_rate;
bool pf_set_mac;
+ u16 vlan;
};
struct mbox_list {
@@ -1738,4 +1739,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
void free_tx_desc(struct adapter *adap, struct sge_txq *q,
unsigned int n, bool unmap);
void free_txq(struct adapter *adap, struct sge_txq *q);
+int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
+ u16 vlan);
#endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 9e0a8a8..30485f9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -406,14 +406,15 @@ static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init)
int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
u32 flag)
{
- struct cudbg_init cudbg_init = { 0 };
struct cudbg_buffer dbg_buff = { 0 };
u32 size, min_size, total_size = 0;
+ struct cudbg_init cudbg_init;
struct cudbg_hdr *cudbg_hdr;
int rc;
size = *buf_size;
+ memset(&cudbg_init, 0, sizeof(struct cudbg_init));
cudbg_init.adap = adap;
cudbg_init.outbuf = buf;
cudbg_init.outbuf_size = size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 4716387..1e3cd8a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2783,7 +2783,30 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
return 0;
}
-#endif
+static int cxgb4_mgmt_set_vf_vlan(struct net_device *dev, int vf,
+ u16 vlan, u8 qos, __be16 vlan_proto)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ int ret;
+
+ if (vf >= adap->num_vfs || vlan > 4095 || qos > 7)
+ return -EINVAL;
+
+ if (vlan_proto != htons(ETH_P_8021Q) || qos != 0)
+ return -EPROTONOSUPPORT;
+
+ ret = t4_set_vlan_acl(adap, adap->mbox, vf + 1, vlan);
+ if (!ret) {
+ adap->vfinfo[vf].vlan = vlan;
+ return 0;
+ }
+
+ dev_err(adap->pdev_dev, "Err %d %s VLAN ACL for PF/VF %d/%d\n",
+ ret, (vlan ? "setting" : "clearing"), adap->pf, vf);
+ return ret;
+}
+#endif /* CONFIG_PCI_IOV */
static int cxgb_set_mac_addr(struct net_device *dev, void *p)
{
@@ -2905,9 +2928,6 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
static int cxgb_setup_tc_flower(struct net_device *dev,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return cxgb4_tc_flower_replace(dev, cls_flower);
@@ -2923,9 +2943,6 @@ static int cxgb_setup_tc_flower(struct net_device *dev,
static int cxgb_setup_tc_cls_u32(struct net_device *dev,
struct tc_cls_u32_offload *cls_u32)
{
- if (cls_u32->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_u32->command) {
case TC_CLSU32_NEW_KNODE:
case TC_CLSU32_REPLACE_KNODE:
@@ -2951,7 +2968,7 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
return -EINVAL;
}
- if (!tc_can_offload(dev))
+ if (!tc_cls_can_offload_and_chain0(dev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -3207,6 +3224,7 @@ static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
.ndo_get_vf_config = cxgb4_mgmt_get_vf_config,
.ndo_set_vf_rate = cxgb4_mgmt_set_vf_rate,
.ndo_get_phys_port_id = cxgb4_mgmt_get_phys_port_id,
+ .ndo_set_vf_vlan = cxgb4_mgmt_set_vf_vlan,
};
#endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 9b9f3f9..3656336 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -43,7 +43,7 @@
#define STATS_CHECK_PERIOD (HZ / 2)
-struct ch_tc_pedit_fields pedits[] = {
+static struct ch_tc_pedit_fields pedits[] = {
PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0),
PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4),
PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0),
@@ -111,6 +111,9 @@ static void cxgb4_process_flow_match(struct net_device *dev,
ethtype_mask = 0;
}
+ if (ethtype_key == ETH_P_IPV6)
+ fs->type = 1;
+
fs->val.ethtype = ethtype_key;
fs->mask.ethtype = ethtype_mask;
fs->val.proto = key->ip_proto;
@@ -205,8 +208,8 @@ static void cxgb4_process_flow_match(struct net_device *dev,
VLAN_PRIO_SHIFT);
vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
VLAN_PRIO_SHIFT);
- fs->val.ivlan = cpu_to_be16(vlan_tci);
- fs->mask.ivlan = cpu_to_be16(vlan_tci_mask);
+ fs->val.ivlan = vlan_tci;
+ fs->mask.ivlan = vlan_tci_mask;
/* Chelsio adapters use ivlan_vld bit to match vlan packets
* as 802.1Q. Also, when vlan tag is present in packets,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 6d76851..047609e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -195,9 +195,11 @@ static void t4_report_fw_error(struct adapter *adap)
u32 pcie_fw;
pcie_fw = t4_read_reg(adap, PCIE_FW_A);
- if (pcie_fw & PCIE_FW_ERR_F)
+ if (pcie_fw & PCIE_FW_ERR_F) {
dev_err(adap->pdev_dev, "Firmware reports adapter error: %s\n",
reason[PCIE_FW_EVAL_G(pcie_fw)]);
+ adap->flags &= ~FW_OK;
+ }
}
/*
@@ -317,9 +319,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
* wait [for a while] till we're at the front [or bail out with an
* EBUSY] ...
*/
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_add_tail(&entry.list, &adap->mlist.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
delay_idx = 0;
ms = delay[0];
@@ -332,9 +334,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
*/
pcie_fw = t4_read_reg(adap, PCIE_FW_A);
if (i > FW_CMD_MAX_TIMEOUT || (pcie_fw & PCIE_FW_ERR_F)) {
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -EBUSY;
t4_record_mbox(adap, cmd, size, access, ret);
return ret;
@@ -365,9 +367,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
v = MBOWNER_G(t4_read_reg(adap, ctl_reg));
if (v != MBOX_OWNER_DRV) {
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT;
t4_record_mbox(adap, cmd, size, access, ret);
return ret;
@@ -418,9 +420,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
execute = i + ms;
t4_record_mbox(adap, cmd_rpl,
MBOX_LEN, access, execute);
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
return -FW_CMD_RETVAL_G((int)res);
}
}
@@ -430,9 +432,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n",
*(const u8 *)cmd, mbox);
t4_report_fw_error(adap);
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
t4_fatal_err(adap);
return ret;
}
@@ -5088,7 +5090,7 @@ int t4_read_rss(struct adapter *adapter, u16 *map)
static unsigned int t4_use_ldst(struct adapter *adap)
{
- return (adap->flags & FW_OK) || !adap->use_bd;
+ return (adap->flags & FW_OK) && !adap->use_bd;
}
/**
@@ -9899,3 +9901,35 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
return ret;
}
+
+/**
+ * t4_set_vlan_acl - Set a VLAN id for the specified VF
+ * @adapter: the adapter
+ * @mbox: mailbox to use for the FW command
+ * @vf: one of the VFs instantiated by the specified PF
+ * @vlan: The vlanid to be set
+ */
+int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
+ u16 vlan)
+{
+ struct fw_acl_vlan_cmd vlan_cmd;
+ unsigned int enable;
+
+ enable = (vlan ? FW_ACL_VLAN_CMD_EN_F : 0);
+ memset(&vlan_cmd, 0, sizeof(vlan_cmd));
+ vlan_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_VLAN_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F |
+ FW_CMD_EXEC_F |
+ FW_ACL_VLAN_CMD_PFN_V(adap->pf) |
+ FW_ACL_VLAN_CMD_VFN_V(vf));
+ vlan_cmd.en_to_len16 = cpu_to_be32(enable | FW_LEN16(vlan_cmd));
+ /* Drop all packets that donot match vlan id */
+ vlan_cmd.dropnovlan_fm = FW_ACL_VLAN_CMD_FM_F;
+ if (enable != 0) {
+ vlan_cmd.nvlan = 1;
+ vlan_cmd.vlanid[0] = cpu_to_be16(vlan);
+ }
+
+ return t4_wr_mbox(adap, adap->mbox, &vlan_cmd, sizeof(vlan_cmd), NULL);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index f3310d5..f88766d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2353,14 +2353,22 @@ struct fw_acl_vlan_cmd {
#define FW_ACL_VLAN_CMD_VFN_S 0
#define FW_ACL_VLAN_CMD_VFN_V(x) ((x) << FW_ACL_VLAN_CMD_VFN_S)
-#define FW_ACL_VLAN_CMD_EN_S 31
-#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S)
+#define FW_ACL_VLAN_CMD_EN_S 31
+#define FW_ACL_VLAN_CMD_EN_M 0x1
+#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S)
+#define FW_ACL_VLAN_CMD_EN_G(x) \
+ (((x) >> S_FW_ACL_VLAN_CMD_EN_S) & FW_ACL_VLAN_CMD_EN_M)
+#define FW_ACL_VLAN_CMD_EN_F FW_ACL_VLAN_CMD_EN_V(1U)
#define FW_ACL_VLAN_CMD_DROPNOVLAN_S 7
#define FW_ACL_VLAN_CMD_DROPNOVLAN_V(x) ((x) << FW_ACL_VLAN_CMD_DROPNOVLAN_S)
-#define FW_ACL_VLAN_CMD_FM_S 6
-#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S)
+#define FW_ACL_VLAN_CMD_FM_S 6
+#define FW_ACL_VLAN_CMD_FM_M 0x1
+#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S)
+#define FW_ACL_VLAN_CMD_FM_G(x) \
+ (((x) >> FW_ACL_VLAN_CMD_FM_S) & FW_ACL_VLAN_CMD_FM_M)
+#define FW_ACL_VLAN_CMD_FM_F FW_ACL_VLAN_CMD_FM_V(1U)
/* old 16-bit port capabilities bitmap (fw_port_cap16_t) */
enum fw_port_cap {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 08c6ddb..5883f09 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -92,6 +92,7 @@ struct sge_rspq;
*/
struct port_info {
struct adapter *adapter; /* our adapter */
+ u32 vlan_id; /* vlan id for VST */
u16 viid; /* virtual interface ID */
s16 xact_addr_filt; /* index of our MAC address filter */
u16 rss_size; /* size of VI's RSS table slice */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 96f69f8..b7e79e6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -791,6 +791,8 @@ static int cxgb4vf_open(struct net_device *dev)
if (err)
goto err_unwind;
+ pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
+
netif_tx_start_all_queues(dev);
set_bit(pi->port_id, &adapter->open_device_map);
return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 129b914..dfce5df 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1202,6 +1202,10 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(qidx >= pi->nqsets);
txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
+ if (pi->vlan_id && !skb_vlan_tag_present(skb))
+ __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q),
+ pi->vlan_id);
+
/*
* Take this opportunity to reclaim any TX Descriptors whose DMA
* transfers have completed.
@@ -1570,6 +1574,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
{
struct adapter *adapter = rxq->rspq.adapter;
struct sge *s = &adapter->sge;
+ struct port_info *pi;
int ret;
struct sk_buff *skb;
@@ -1586,8 +1591,9 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
skb->truesize += skb->data_len;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(skb, rxq->rspq.idx);
+ pi = netdev_priv(skb->dev);
- if (pkt->vlan_ex) {
+ if (pkt->vlan_ex && !pi->vlan_id) {
__vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q),
be16_to_cpu(pkt->vlan));
rxq->stats.vlan_ex++;
@@ -1620,6 +1626,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
struct adapter *adapter = rspq->adapter;
struct sge *s = &adapter->sge;
+ struct port_info *pi;
/*
* If this is a good TCP packet and we have Generic Receive Offload
@@ -1644,6 +1651,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
__skb_pull(skb, s->pktshift);
skb->protocol = eth_type_trans(skb, rspq->netdev);
skb_record_rx_queue(skb, rspq->idx);
+ pi = netdev_priv(skb->dev);
rxq->stats.pkts++;
if (csum_ok && !pkt->err_vec &&
@@ -1660,9 +1668,10 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
} else
skb_checksum_none_assert(skb);
- if (pkt->vlan_ex) {
+ if (pkt->vlan_ex && !pi->vlan_id) {
rxq->stats.vlan_ex++;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(pkt->vlan));
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(pkt->vlan));
}
netif_receive_skb(skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 9cf9c56..712e8f0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -413,5 +413,6 @@ int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
int t4vf_prep_adapter(struct adapter *);
int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
unsigned int *naddr, u8 *addr);
+int t4vf_get_vf_vlan_acl(struct adapter *adapter);
#endif /* __T4VF_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 67aec59..798695b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -2147,3 +2147,31 @@ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
return ret;
}
+
+/**
+ * t4vf_get_vf_vlan_acl - Get the VLAN ID to be set to
+ * the VI of this VF.
+ * @adapter: The adapter
+ *
+ * Find the VLAN ID to be set to the VF's VI. The requested VLAN ID
+ * is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_vlan_acl(struct adapter *adapter)
+{
+ struct fw_acl_vlan_cmd cmd;
+ int vlan = 0;
+ int ret = 0;
+
+ cmd.op_to_vfn = htonl(FW_CMD_OP_V(FW_ACL_VLAN_CMD) |
+ FW_CMD_REQUEST_F | FW_CMD_READ_F);
+
+ /* Note: Do not enable the ACL */
+ cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+
+ ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+
+ if (!ret)
+ vlan = be16_to_cpu(cmd.vlanid[0]);
+
+ return vlan;
+}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index c6e859a..c36c819 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4634,6 +4634,14 @@ int be_update_queues(struct be_adapter *adapter)
be_schedule_worker(adapter);
+ /* The IF was destroyed and re-created. We need to clear
+ * all promiscuous flags valid for the destroyed IF.
+ * Without this promisc mode is not restored during
+ * be_open() because the driver thinks that it is
+ * already enabled in HW.
+ */
+ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+
if (netif_running(netdev))
status = be_open(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 7410205..b034c7f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1109,6 +1109,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
.set_rxfh = hns3_set_rss,
.get_link_ksettings = hns3_get_link_ksettings,
.get_channels = hns3_get_channels,
+ .get_coalesce = hns3_get_coalesce,
+ .set_coalesce = hns3_set_coalesce,
};
static const struct ethtool_ops hns3_ethtool_ops = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 96f453f..f38fc5c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -116,6 +116,9 @@ static int hclge_get_ring_chain_from_mbx(
hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
ring_chain->tqp_index =
hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
+ hnae_set_field(ring_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
+ HCLGE_INT_GL_IDX_S,
+ req->msg[5]);
cur_chain = ring_chain;
@@ -133,6 +136,11 @@ static int hclge_get_ring_chain_from_mbx(
[req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+ hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
+ HCLGE_INT_GL_IDX_S,
+ req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
+ HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+
cur_chain->next = new_chain;
cur_chain = new_chain;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 3d2bc9a..0d89965 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -565,6 +565,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
node->tqp_index;
+ req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
+ hnae_get_field(node->int_gl_idx,
+ HNAE3_RING_GL_IDX_M,
+ HNAE3_RING_GL_IDX_S);
+
if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
HCLGEVF_RING_NODE_VARIABLE_NUM) {
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 71ddad1..354c098 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -494,6 +494,9 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s
case 16384:
ret |= EMAC_MR1_RFS_16K;
break;
+ case 8192:
+ ret |= EMAC4_MR1_RFS_8K;
+ break;
case 4096:
ret |= EMAC_MR1_RFS_4K;
break;
@@ -516,6 +519,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_
case 16384:
ret |= EMAC4_MR1_TFS_16K;
break;
+ case 8192:
+ ret |= EMAC4_MR1_TFS_8K;
+ break;
case 4096:
ret |= EMAC4_MR1_TFS_4K;
break;
diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index bc14dcf..e2f80cc 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h
@@ -138,9 +138,11 @@ struct emac_regs {
#define EMAC4_MR1_RFS_2K 0x00100000
#define EMAC4_MR1_RFS_4K 0x00180000
+#define EMAC4_MR1_RFS_8K 0x00200000
#define EMAC4_MR1_RFS_16K 0x00280000
#define EMAC4_MR1_TFS_2K 0x00020000
#define EMAC4_MR1_TFS_4K 0x00030000
+#define EMAC4_MR1_TFS_8K 0x00040000
#define EMAC4_MR1_TFS_16K 0x00050000
#define EMAC4_MR1_TR 0x00008000
#define EMAC4_MR1_MWSW_001 0x00001000
@@ -229,7 +231,7 @@ struct emac_regs {
#define EMAC_STACR_PHYE 0x00004000
#define EMAC_STACR_STAC_MASK 0x00003000
#define EMAC_STACR_STAC_READ 0x00001000
-#define EMAC_STACR_STAC_WRITE 0x00002000
+#define EMAC_STACR_STAC_WRITE 0x00000800
#define EMAC_STACR_OPBC_MASK 0x00000C00
#define EMAC_STACR_OPBC_50 0x00000000
#define EMAC_STACR_OPBC_66 0x00000400
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index be2ce8d..8f2a77e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -411,6 +411,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
struct ibmvnic_rx_pool *rx_pool;
int rx_scrqs;
int i, j, rc;
+ u64 *size_array;
+
+ size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
for (i = 0; i < rx_scrqs; i++) {
@@ -418,7 +422,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
- rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff);
+ if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
+ free_long_term_buff(adapter, &rx_pool->long_term_buff);
+ rx_pool->buff_size = be64_to_cpu(size_array[i]);
+ alloc_long_term_buff(adapter, &rx_pool->long_term_buff,
+ rx_pool->size *
+ rx_pool->buff_size);
+ } else {
+ rc = reset_long_term_buff(adapter,
+ &rx_pool->long_term_buff);
+ }
+
if (rc)
return rc;
@@ -440,14 +454,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
static void release_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
- int rx_scrqs;
int i, j;
if (!adapter->rx_pool)
return;
- rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- for (i = 0; i < rx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_rx_pools; i++) {
rx_pool = &adapter->rx_pool[i];
netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -470,6 +482,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->rx_pool);
adapter->rx_pool = NULL;
+ adapter->num_active_rx_pools = 0;
}
static int init_rx_pools(struct net_device *netdev)
@@ -494,6 +507,8 @@ static int init_rx_pools(struct net_device *netdev)
return -1;
}
+ adapter->num_active_rx_pools = 0;
+
for (i = 0; i < rxadd_subcrqs; i++) {
rx_pool = &adapter->rx_pool[i];
@@ -537,6 +552,8 @@ static int init_rx_pools(struct net_device *netdev)
rx_pool->next_free = 0;
}
+ adapter->num_active_rx_pools = rxadd_subcrqs;
+
return 0;
}
@@ -587,13 +604,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
static void release_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
- int i, tx_scrqs;
+ int i;
if (!adapter->tx_pool)
return;
- tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
- for (i = 0; i < tx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_tx_pools; i++) {
netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
tx_pool = &adapter->tx_pool[i];
kfree(tx_pool->tx_buff);
@@ -604,6 +620,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->tx_pool);
adapter->tx_pool = NULL;
+ adapter->num_active_tx_pools = 0;
}
static int init_tx_pools(struct net_device *netdev)
@@ -620,6 +637,8 @@ static int init_tx_pools(struct net_device *netdev)
if (!adapter->tx_pool)
return -1;
+ adapter->num_active_tx_pools = 0;
+
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
@@ -667,6 +686,8 @@ static int init_tx_pools(struct net_device *netdev)
tx_pool->producer_index = 0;
}
+ adapter->num_active_tx_pools = tx_subcrqs;
+
return 0;
}
@@ -861,7 +882,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
- reinit_completion(&adapter->fw_done);
+ init_completion(&adapter->fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
ibmvnic_send_crq(adapter, &crq);
@@ -923,6 +944,13 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (!adapter->vpd)
return -ENOMEM;
+ /* Vital Product Data (VPD) */
+ rc = ibmvnic_get_vpd(adapter);
+ if (rc) {
+ netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
+ return rc;
+ }
+
adapter->map_id = 1;
adapter->napi = kcalloc(adapter->req_rx_queues,
sizeof(struct napi_struct), GFP_KERNEL);
@@ -996,7 +1024,7 @@ static int __ibmvnic_open(struct net_device *netdev)
static int ibmvnic_open(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int rc, vpd;
+ int rc;
mutex_lock(&adapter->reset_lock);
@@ -1019,11 +1047,6 @@ static int ibmvnic_open(struct net_device *netdev)
rc = __ibmvnic_open(netdev);
netif_carrier_on(netdev);
- /* Vital Product Data (VPD) */
- vpd = ibmvnic_get_vpd(adapter);
- if (vpd)
- netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
-
mutex_unlock(&adapter->reset_lock);
return rc;
@@ -1553,6 +1576,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
static int do_reset(struct ibmvnic_adapter *adapter,
struct ibmvnic_rwi *rwi, u32 reset_state)
{
+ u64 old_num_rx_queues, old_num_tx_queues;
struct net_device *netdev = adapter->netdev;
int i, rc;
@@ -1562,6 +1586,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
+ old_num_rx_queues = adapter->req_rx_queues;
+ old_num_tx_queues = adapter->req_tx_queues;
+
if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
rc = ibmvnic_reenable_crq_queue(adapter);
if (rc)
@@ -1606,6 +1633,12 @@ static int do_reset(struct ibmvnic_adapter *adapter,
rc = init_resources(adapter);
if (rc)
return rc;
+ } else if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues) {
+ release_rx_pools(adapter);
+ release_tx_pools(adapter);
+ init_rx_pools(netdev);
+ init_tx_pools(netdev);
} else {
rc = reset_tx_pools(adapter);
if (rc)
@@ -3603,7 +3636,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
*req_value,
(long int)be64_to_cpu(crq->request_capability_rsp.
number), name);
- *req_value = be64_to_cpu(crq->request_capability_rsp.number);
+
+ if (be16_to_cpu(crq->request_capability_rsp.capability) ==
+ REQ_MTU) {
+ pr_err("mtu of %llu is not supported. Reverting.\n",
+ *req_value);
+ *req_value = adapter->fallback.mtu;
+ } else {
+ *req_value =
+ be64_to_cpu(crq->request_capability_rsp.number);
+ }
+
ibmvnic_send_req_caps(adapter, 1);
return;
default:
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 2df79fd..fe21a6e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1091,6 +1091,8 @@ struct ibmvnic_adapter {
u64 opt_rxba_entries_per_subcrq;
__be64 tx_rx_desc_req;
u8 map_id;
+ u64 num_active_rx_pools;
+ u64 num_active_tx_pools;
struct tasklet_struct tasklet;
enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 9f18d39..1298b69 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3303,9 +3303,11 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
if (adapter->flags & FLAG_IS_ICH) {
u32 rxdctl = er32(RXDCTL(0));
- ew32(RXDCTL(0), rxdctl | 0x3);
+ ew32(RXDCTL(0), rxdctl | 0x3 | BIT(8));
}
+ dev_info(&adapter->pdev->dev,
+ "Some CPU C-states have been disabled in order to enable jumbo frames\n");
pm_qos_update_request(&adapter->pm_qos_req, lat);
} else {
pm_qos_update_request(&adapter->pm_qos_req,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index ea3ab24..760cfa5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -353,7 +353,7 @@ int fm10k_iov_resume(struct pci_dev *pdev)
struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
/* allocate all but the last GLORT to the VFs */
- if (i == ((~hw->mac.dglort_map) >> FM10K_DGLORTMAP_MASK_SHIFT))
+ if (i == (~hw->mac.dglort_map >> FM10K_DGLORTMAP_MASK_SHIFT))
break;
/* assign GLORT to VF, and restrict it to multicast */
@@ -511,7 +511,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
return err;
/* allocate VFs if not already allocated */
- if (num_vfs && (num_vfs != current_vfs)) {
+ if (num_vfs && num_vfs != current_vfs) {
/* Disable completer abort error reporting as
* the VFs can trigger this any time they read a queue
* that they don't own.
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index adc62fb..a38ae5c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -934,8 +934,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
if (vid >= VLAN_N_VID)
return -EINVAL;
- /* Verify we have permission to add VLANs */
- if (hw->mac.vlan_override)
+ /* Verify that we have permission to add VLANs. If this is a request
+ * to remove a VLAN, we still want to allow the user to remove the
+ * VLAN device. In that case, we need to clear the bit in the
+ * active_vlans bitmask.
+ */
+ if (set && hw->mac.vlan_override)
return -EACCES;
/* update active_vlans bitmask */
@@ -954,6 +958,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
rx_ring->vid &= ~FM10K_VLAN_CLEAR;
}
+ /* If our VLAN has been overridden, there is no reason to send VLAN
+ * removal requests as they will be silently ignored.
+ */
+ if (hw->mac.vlan_override)
+ return 0;
+
/* Do not remove default VLAN ID related entries from VLAN and MAC
* tables
*/
@@ -1040,14 +1050,13 @@ static int __fm10k_uc_sync(struct net_device *dev,
const unsigned char *addr, bool sync)
{
struct fm10k_intfc *interface = netdev_priv(dev);
- struct fm10k_hw *hw = &interface->hw;
u16 vid, glort = interface->glort;
s32 err;
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
err = fm10k_queue_mac_request(interface, glort,
@@ -1106,14 +1115,13 @@ static int __fm10k_mc_sync(struct net_device *dev,
const unsigned char *addr, bool sync)
{
struct fm10k_intfc *interface = netdev_priv(dev);
- struct fm10k_hw *hw = &interface->hw;
u16 vid, glort = interface->glort;
s32 err;
if (!is_multicast_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
err = fm10k_queue_mac_request(interface, glort,
@@ -1157,10 +1165,12 @@ static void fm10k_set_rx_mode(struct net_device *dev)
/* update xcast mode first, but only if it changed */
if (interface->xcast_mode != xcast_mode) {
- /* update VLAN table */
+ /* update VLAN table when entering promiscuous mode */
if (xcast_mode == FM10K_XCAST_MODE_PROMISC)
fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL,
0, true);
+
+ /* clear VLAN table when exiting promiscuous mode */
if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC)
fm10k_clear_unused_vlans(interface);
@@ -1182,9 +1192,10 @@ static void fm10k_set_rx_mode(struct net_device *dev)
void fm10k_restore_rx_state(struct fm10k_intfc *interface)
{
+ struct fm10k_l2_accel *l2_accel = interface->l2_accel;
struct net_device *netdev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
- int xcast_mode;
+ int xcast_mode, i;
u16 vid, glort;
/* record glort for this interface */
@@ -1211,11 +1222,8 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0,
xcast_mode == FM10K_XCAST_MODE_PROMISC);
- /* Add filter for VLAN 0 */
- fm10k_queue_vlan_request(interface, 0, 0, true);
-
/* update table with current entries */
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
fm10k_queue_vlan_request(interface, vid, 0, true);
@@ -1234,6 +1242,24 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
__dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync);
__dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync);
+ /* synchronize macvlan addresses */
+ if (l2_accel) {
+ for (i = 0; i < l2_accel->size; i++) {
+ struct net_device *sdev = l2_accel->macvlan[i];
+
+ if (!sdev)
+ continue;
+
+ glort = l2_accel->dglort + 1 + i;
+
+ hw->mac.ops.update_xcast_mode(hw, glort,
+ FM10K_XCAST_MODE_MULTI);
+ fm10k_queue_mac_request(interface, glort,
+ sdev->dev_addr,
+ hw->mac.default_vid, true);
+ }
+ }
+
fm10k_mbx_unlock(interface);
/* record updated xcast mode state */
@@ -1490,7 +1516,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_MULTI);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
- 0, true);
+ hw->mac.default_vid, true);
}
fm10k_mbx_unlock(interface);
@@ -1530,7 +1556,7 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
- 0, false);
+ hw->mac.default_vid, false);
}
fm10k_mbx_unlock(interface);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 425d814..d6406fc 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -866,7 +866,7 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
/* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is
* used here to indicate to the VF that it will not have privilege to
* write VLAN_TABLE. All policy is enforced on the PF but this allows
- * the VF to correctly report errors to userspace rqeuests.
+ * the VF to correctly report errors to userspace requests.
*/
if (vf_info->pf_vid)
vf_vid = vf_info->pf_vid | FM10K_VLAN_OVERRIDE;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 9af7425..d9670cd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1027,7 +1027,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
hw->aq.arq.next_to_clean = ntc;
hw->aq.arq.next_to_use = ntu;
- i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode));
+ i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode), &e->desc);
clean_arq_element_out:
/* Set pending if needed, unlock and return */
if (pending)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index c577634..0d471b0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -2231,8 +2231,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
*/
struct i40e_aqc_nvm_update {
u8 command_flags;
-#define I40E_AQ_NVM_LAST_CMD 0x01
-#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_LAST_CMD 0x01
+#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01
u8 module_pointer;
__le16 length;
__le32 offset;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 1b1e2ac..0de9610 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -378,11 +378,11 @@ void i40e_client_subtask(struct i40e_pf *pf)
if (!client || !cdev)
return;
- /* Here we handle client opens. If the client is down, but
- * the netdev is up, then open the client.
+ /* Here we handle client opens. If the client is down, and
+ * the netdev is registered, then open the client.
*/
if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
- if (!test_bit(__I40E_VSI_DOWN, vsi->state) &&
+ if (vsi->netdev_registered &&
client->ops && client->ops->open) {
set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
ret = client->ops->open(&cdev->lan_info, client);
@@ -393,17 +393,19 @@ void i40e_client_subtask(struct i40e_pf *pf)
i40e_client_del_instance(pf);
}
}
- } else {
- /* Likewise for client close. If the client is up, but the netdev
- * is down, then close the client.
- */
- if (test_bit(__I40E_VSI_DOWN, vsi->state) &&
- client->ops && client->ops->close) {
- clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
- client->ops->close(&cdev->lan_info, client, false);
- i40e_client_release_qvlist(&cdev->lan_info);
- }
}
+
+ /* enable/disable PE TCP_ENA flag based on netdev down/up
+ */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+ 0, 0, 0,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
+ else
+ i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+ 0, 0,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
}
/**
@@ -717,13 +719,13 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
return -ENOENT;
}
- if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
- (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+ (flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
ctxt.info.valid_sections =
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
- } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
- !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+ !(flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
ctxt.info.valid_sections =
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index 15b21a5..ba55c88 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -132,7 +132,7 @@ struct i40e_info {
#define I40E_CLIENT_RESET_LEVEL_PF 1
#define I40E_CLIENT_RESET_LEVEL_CORE 2
-#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE BIT(1)
+#define I40E_CLIENT_VSI_FLAG_TCP_ENABLE BIT(1)
struct i40e_ops {
/* setup_q_vector_list enables queues with a particular vector */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 40c5f76..ee6052e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1486,6 +1486,7 @@ u32 i40e_led_get(struct i40e_hw *hw)
case I40E_COMBINED_ACTIVITY:
case I40E_FILTER_ACTIVITY:
case I40E_MAC_ACTIVITY:
+ case I40E_LINK_ACTIVITY:
continue;
default:
break;
@@ -1534,6 +1535,7 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
case I40E_COMBINED_ACTIVITY:
case I40E_FILTER_ACTIVITY:
case I40E_MAC_ACTIVITY:
+ case I40E_LINK_ACTIVITY:
continue;
default:
break;
@@ -1544,9 +1546,6 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
I40E_GLGEN_GPIO_CTL_LED_MODE_MASK);
- if (mode == I40E_LINK_ACTIVITY)
- blink = false;
-
if (blink)
gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
else
@@ -3465,13 +3464,14 @@ exit:
* @length: length of the section to be written (in bytes from the offset)
* @data: command buffer (size [bytes] = length)
* @last_command: tells if this is the last command in a series
+ * @preservation_flags: Preservation mode flags
* @cmd_details: pointer to command details structure or NULL
*
* Update the NVM using the admin queue commands
**/
i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
u32 offset, u16 length, void *data,
- bool last_command,
+ bool last_command, u8 preservation_flags,
struct i40e_asq_cmd_details *cmd_details)
{
struct i40e_aq_desc desc;
@@ -3490,6 +3490,16 @@ i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
/* If this is the last command in a series, set the proper flag. */
if (last_command)
cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+ if (hw->mac.type == I40E_MAC_X722) {
+ if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_SELECTED)
+ cmd->command_flags |=
+ (I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED <<
+ I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+ else if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_ALL)
+ cmd->command_flags |=
+ (I40E_AQ_NVM_PRESERVATION_FLAGS_ALL <<
+ I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+ }
cmd->module_pointer = module_pointer;
cmd->offset = cpu_to_le32(offset);
cmd->length = cpu_to_le16(length);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2ab22eba..2703a92 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4877,104 +4877,6 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
#endif
/**
- * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue
- * @q_idx: TX queue number
- * @vsi: Pointer to VSI struct
- *
- * This function checks specified queue for given VSI. Detects hung condition.
- * We proactively detect hung TX queues by checking if interrupts are disabled
- * but there are pending descriptors. If it appears hung, attempt to recover
- * by triggering a SW interrupt.
- **/
-static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
-{
- struct i40e_ring *tx_ring = NULL;
- struct i40e_pf *pf;
- u32 val, tx_pending;
- int i;
-
- pf = vsi->back;
-
- /* now that we have an index, find the tx_ring struct */
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
- if (q_idx == vsi->tx_rings[i]->queue_index) {
- tx_ring = vsi->tx_rings[i];
- break;
- }
- }
- }
-
- if (!tx_ring)
- return;
-
- /* Read interrupt register */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
- val = rd32(&pf->hw,
- I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
- tx_ring->vsi->base_vector - 1));
- else
- val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
-
- tx_pending = i40e_get_tx_pending(tx_ring);
-
- /* Interrupts are disabled and TX pending is non-zero,
- * trigger the SW interrupt (don't wait). Worst case
- * there will be one extra interrupt which may result
- * into not cleaning any queues because queues are cleaned.
- */
- if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
- i40e_force_wb(vsi, tx_ring->q_vector);
-}
-
-/**
- * i40e_detect_recover_hung - Function to detect and recover hung_queues
- * @pf: pointer to PF struct
- *
- * LAN VSI has netdev and netdev has TX queues. This function is to check
- * each of those TX queues if they are hung, trigger recovery by issuing
- * SW interrupt.
- **/
-static void i40e_detect_recover_hung(struct i40e_pf *pf)
-{
- struct net_device *netdev;
- struct i40e_vsi *vsi;
- unsigned int i;
-
- /* Only for LAN VSI */
- vsi = pf->vsi[pf->lan_vsi];
-
- if (!vsi)
- return;
-
- /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */
- if (test_bit(__I40E_VSI_DOWN, vsi->back->state) ||
- test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state))
- return;
-
- /* Make sure type is MAIN VSI */
- if (vsi->type != I40E_VSI_MAIN)
- return;
-
- netdev = vsi->netdev;
- if (!netdev)
- return;
-
- /* Bail out if netif_carrier is not OK */
- if (!netif_carrier_ok(netdev))
- return;
-
- /* Go thru' TX queues for netdev */
- for (i = 0; i < netdev->num_tx_queues; i++) {
- struct netdev_queue *q;
-
- q = netdev_get_tx_queue(netdev, i);
- if (q)
- i40e_detect_recover_hung_queue(i, vsi);
- }
-}
-
-/**
* i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
* @pf: pointer to PF
*
@@ -5342,6 +5244,8 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
{
u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
struct i40e_vsi_context ctxt;
int ret = 0;
int i;
@@ -5359,10 +5263,40 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+
+ dev_info(&pf->pdev->dev,
"Failed configuring TC map %d for VSI %d\n",
enabled_tc, vsi->seid);
- goto out;
+ ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
+ &bw_config, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed querying vsi bw info, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ goto out;
+ }
+ if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
+ u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
+
+ if (!valid_tc)
+ valid_tc = bw_config.tc_valid_bits;
+ /* Always enable TC0, no matter what */
+ valid_tc |= 1;
+ dev_info(&pf->pdev->dev,
+ "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
+ enabled_tc, bw_config.tc_valid_bits, valid_tc);
+ enabled_tc = valid_tc;
+ }
+
+ ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "Unable to configure TC map %d for VSI %d\n",
+ enabled_tc, vsi->seid);
+ goto out;
+ }
}
/* Update Queue Pairs Mapping for currently enabled UPs */
@@ -5402,13 +5336,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
/* Update the VSI after updating the VSI queue-mapping
* information
*/
- ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"Update vsi tc config failed, err %s aq_err %s\n",
- i40e_stat_str(&vsi->back->hw, ret),
- i40e_aq_str(&vsi->back->hw,
- vsi->back->hw.aq.asq_last_status));
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
goto out;
}
/* update the local VSI info with updated queue map */
@@ -5418,11 +5351,10 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
/* Update current VSI BW information */
ret = i40e_vsi_get_bw_info(vsi);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"Failed updating vsi bw info, err %s aq_err %s\n",
- i40e_stat_str(&vsi->back->hw, ret),
- i40e_aq_str(&vsi->back->hw,
- vsi->back->hw.aq.asq_last_status));
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
goto out;
}
@@ -7505,9 +7437,6 @@ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
{
struct i40e_vsi *vsi = np->vsi;
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return i40e_configure_clsflower(vsi, cls_flower);
@@ -7525,6 +7454,9 @@ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct i40e_netdev_priv *np = cb_priv;
+ if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data))
+ return -EOPNOTSUPP;
+
switch (type) {
case TC_SETUP_CLSFLOWER:
return i40e_setup_tc_cls_flower(np, type_data);
@@ -9075,6 +9007,17 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
vsi->uplink_seid);
return ret;
}
+ /* Reconfigure TX queues using QTX_CTL register */
+ ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "failed to configure TX rings for channel %u\n",
+ ch->seid);
+ return ret;
+ }
+ /* update 'next_base_queue' */
+ vsi->next_base_queue = vsi->next_base_queue +
+ ch->num_queue_pairs;
if (ch->max_tx_rate) {
u64 credits = ch->max_tx_rate;
@@ -9695,7 +9638,7 @@ static void i40e_service_task(struct work_struct *work)
if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
return;
- i40e_detect_recover_hung(pf);
+ i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
i40e_sync_filters_subtask(pf);
i40e_reset_subtask(pf);
i40e_handle_mdd_event(pf);
@@ -10462,10 +10405,9 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
/* set up vector assignment tracking */
size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
pf->irq_pile = kzalloc(size, GFP_KERNEL);
- if (!pf->irq_pile) {
- dev_err(&pf->pdev->dev, "error allocating irq_pile memory\n");
+ if (!pf->irq_pile)
return -ENOMEM;
- }
+
pf->irq_pile->num_entries = vectors;
pf->irq_pile->search_hint = 0;
@@ -10783,8 +10725,13 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
/* Determine the RSS size of the VSI */
if (!vsi->rss_size) {
u16 qcount;
-
- qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+ /* If the firmware does something weird during VSI init, we
+ * could end up with zero TCs. Check for that to avoid
+ * divide-by-zero. It probably won't pass traffic, but it also
+ * won't panic.
+ */
+ qcount = vsi->num_queue_pairs /
+ (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
}
if (!vsi->rss_size)
@@ -10972,7 +10919,7 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
ret = i40e_aq_update_nvm(&pf->hw,
I40E_SR_NVM_CONTROL_WORD,
0x10, sizeof(nvm_word),
- &nvm_word, true, NULL);
+ &nvm_word, true, 0, NULL);
/* Save off last admin queue command status before releasing
* the NVM
*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 425713f..76a5cb0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -239,8 +239,9 @@ read_nvm_exit:
*
* Writes a 16 bit words buffer to the Shadow RAM using the admin command.
**/
-static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
- u32 offset, u16 words, void *data,
+static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw,
+ u8 module_pointer, u32 offset,
+ u16 words, void *data,
bool last_command)
{
i40e_status ret_code = I40E_ERR_NVM;
@@ -496,7 +497,8 @@ static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
ret_code = i40e_aq_update_nvm(hw, module_pointer,
2 * offset, /*bytes*/
2 * words, /*bytes*/
- data, last_command, &cmd_details);
+ data, last_command, 0,
+ &cmd_details);
return ret_code;
}
@@ -677,6 +679,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
u8 *bytes, int *perrno);
+static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ u8 *bytes, int *perrno);
static inline u8 i40e_nvmupd_get_module(u32 val)
{
return (u8)(val & I40E_NVM_MOD_PNT_MASK);
@@ -686,6 +691,12 @@ static inline u8 i40e_nvmupd_get_transaction(u32 val)
return (u8)((val & I40E_NVM_TRANS_MASK) >> I40E_NVM_TRANS_SHIFT);
}
+static inline u8 i40e_nvmupd_get_preservation_flags(u32 val)
+{
+ return (u8)((val & I40E_NVM_PRESERVATION_FLAGS_MASK) >>
+ I40E_NVM_PRESERVATION_FLAGS_SHIFT);
+}
+
static const char * const i40e_nvm_update_state_str[] = {
"I40E_NVMUPD_INVALID",
"I40E_NVMUPD_READ_CON",
@@ -703,6 +714,7 @@ static const char * const i40e_nvm_update_state_str[] = {
"I40E_NVMUPD_STATUS",
"I40E_NVMUPD_EXEC_AQ",
"I40E_NVMUPD_GET_AQ_RESULT",
+ "I40E_NVMUPD_GET_AQ_EVENT",
};
/**
@@ -798,9 +810,9 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
* the wait info and return before doing anything else
*/
if (cmd->offset == 0xffff) {
- i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode);
+ i40e_nvmupd_clear_wait_state(hw);
status = 0;
- goto exit;
+ break;
}
status = I40E_ERR_NOT_READY;
@@ -815,7 +827,7 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
*perrno = -ESRCH;
break;
}
-exit:
+
mutex_unlock(&hw->aq.arq_mutex);
return status;
}
@@ -944,6 +956,10 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
status = i40e_nvmupd_get_aq_result(hw, cmd, bytes, perrno);
break;
+ case I40E_NVMUPD_GET_AQ_EVENT:
+ status = i40e_nvmupd_get_aq_event(hw, cmd, bytes, perrno);
+ break;
+
default:
i40e_debug(hw, I40E_DEBUG_NVM,
"NVMUPD: bad cmd %s in init state\n",
@@ -1118,38 +1134,53 @@ retry:
}
/**
- * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * i40e_nvmupd_clear_wait_state - clear wait state on hw
* @hw: pointer to the hardware structure
- * @opcode: the event that just happened
**/
-void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw)
{
- if (opcode == hw->nvm_wait_opcode) {
- i40e_debug(hw, I40E_DEBUG_NVM,
- "NVMUPD: clearing wait on opcode 0x%04x\n", opcode);
- if (hw->nvm_release_on_done) {
- i40e_release_nvm(hw);
- hw->nvm_release_on_done = false;
- }
- hw->nvm_wait_opcode = 0;
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVMUPD: clearing wait on opcode 0x%04x\n",
+ hw->nvm_wait_opcode);
- if (hw->aq.arq_last_status) {
- hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
- return;
- }
+ if (hw->nvm_release_on_done) {
+ i40e_release_nvm(hw);
+ hw->nvm_release_on_done = false;
+ }
+ hw->nvm_wait_opcode = 0;
- switch (hw->nvmupd_state) {
- case I40E_NVMUPD_STATE_INIT_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
- break;
+ if (hw->aq.arq_last_status) {
+ hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+ return;
+ }
- case I40E_NVMUPD_STATE_WRITE_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
- break;
+ switch (hw->nvmupd_state) {
+ case I40E_NVMUPD_STATE_INIT_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+ break;
- default:
- break;
- }
+ case I40E_NVMUPD_STATE_WRITE_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
+ break;
+
+ default:
+ break;
+ }
+}
+
+/**
+ * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * @hw: pointer to the hardware structure
+ * @opcode: the event that just happened
+ **/
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+ struct i40e_aq_desc *desc)
+{
+ u32 aq_desc_len = sizeof(struct i40e_aq_desc);
+
+ if (opcode == hw->nvm_wait_opcode) {
+ memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len);
+ i40e_nvmupd_clear_wait_state(hw);
}
}
@@ -1205,6 +1236,9 @@ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
else if (module == 0)
upd_cmd = I40E_NVMUPD_GET_AQ_RESULT;
break;
+ case I40E_NVM_AQE:
+ upd_cmd = I40E_NVMUPD_GET_AQ_EVENT;
+ break;
}
break;
@@ -1267,6 +1301,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
u32 aq_data_len;
i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+ if (cmd->offset == 0xffff)
+ return 0;
+
memset(&cmd_details, 0, sizeof(cmd_details));
cmd_details.wb_desc = &hw->nvm_wb_desc;
@@ -1302,6 +1339,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
}
}
+ if (cmd->offset)
+ memset(&hw->nvm_aq_event_desc, 0, aq_desc_len);
+
/* and away we go! */
status = i40e_asq_send_command(hw, aq_desc, buff,
buff_size, &cmd_details);
@@ -1311,6 +1351,7 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+ return status;
}
/* should we wait for a followup event? */
@@ -1392,6 +1433,40 @@ static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
}
/**
+ * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ u8 *bytes, int *perrno)
+{
+ u32 aq_total_len;
+ u32 aq_desc_len;
+
+ i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+
+ aq_desc_len = sizeof(struct i40e_aq_desc);
+ aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen);
+
+ /* check copylength range */
+ if (cmd->data_size > aq_total_len) {
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "%s: copy length %d too big, trimming to %d\n",
+ __func__, cmd->data_size, aq_total_len);
+ cmd->data_size = aq_total_len;
+ }
+
+ memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size);
+
+ return 0;
+}
+
+/**
* i40e_nvmupd_nvm_read - Read NVM
* @hw: pointer to hardware structure
* @cmd: pointer to nvm update command buffer
@@ -1486,18 +1561,20 @@ static i40e_status i40e_nvmupd_nvm_write(struct i40e_hw *hw,
i40e_status status = 0;
struct i40e_asq_cmd_details cmd_details;
u8 module, transaction;
+ u8 preservation_flags;
bool last;
transaction = i40e_nvmupd_get_transaction(cmd->config);
module = i40e_nvmupd_get_module(cmd->config);
last = (transaction & I40E_NVM_LCB);
+ preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config);
memset(&cmd_details, 0, sizeof(cmd_details));
cmd_details.wb_desc = &hw->nvm_wb_desc;
status = i40e_aq_update_nvm(hw, module, cmd->offset,
(u16)cmd->data_size, bytes, last,
- &cmd_details);
+ preservation_flags, &cmd_details);
if (status) {
i40e_debug(hw, I40E_DEBUG_NVM,
"i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index b3cc89c..187dd53 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -214,7 +214,7 @@ i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
u32 offset, u16 length, void *data,
- bool last_command,
+ bool last_command, u8 preservation_flags,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
u8 mib_type, void *buff, u16 buff_size,
@@ -333,7 +333,9 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
u8 *bytes, int *);
-void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode);
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+ struct i40e_aq_desc *desc);
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 40edb6e..8d22758 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -726,6 +726,59 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring)
return 0;
}
+/**
+ * i40e_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi: pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void i40e_detect_recover_hung(struct i40e_vsi *vsi)
+{
+ struct i40e_ring *tx_ring = NULL;
+ struct net_device *netdev;
+ unsigned int i;
+ int packets;
+
+ if (!vsi)
+ return;
+
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev)
+ return;
+
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ tx_ring = vsi->tx_rings[i];
+ if (tx_ring && tx_ring->desc) {
+ /* If packet counter has not changed the queue is
+ * likely stalled, so force an interrupt for this
+ * queue.
+ *
+ * prev_pkt_ctr would be negative if there was no
+ * pending work.
+ */
+ packets = tx_ring->stats.packets & INT_MAX;
+ if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+ i40e_force_wb(vsi, tx_ring->q_vector);
+ continue;
+ }
+
+ /* Memory barrier between read of packet count and call
+ * to i40e_get_tx_pending()
+ */
+ smp_rmb();
+ tx_ring->tx_stats.prev_pkt_ctr =
+ i40e_get_tx_pending(tx_ring) ? packets : -1;
+ }
+ }
+}
+
#define WB_STRIDE 4
/**
@@ -1163,6 +1216,7 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->tx_stats.prev_pkt_ctr = -1;
return 0;
err:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 2d08760..d4799b41 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -333,6 +333,7 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
+ int prev_pkt_ctr;
};
struct i40e_rx_queue_stats {
@@ -501,6 +502,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40e_get_tx_pending(struct i40e_ring *ring);
+void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 5a708c3..cd294e6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -402,6 +402,7 @@ enum i40e_nvmupd_cmd {
I40E_NVMUPD_STATUS,
I40E_NVMUPD_EXEC_AQ,
I40E_NVMUPD_GET_AQ_RESULT,
+ I40E_NVMUPD_GET_AQ_EVENT,
};
enum i40e_nvmupd_state {
@@ -421,15 +422,21 @@ enum i40e_nvmupd_state {
#define I40E_NVM_MOD_PNT_MASK 0xFF
-#define I40E_NVM_TRANS_SHIFT 8
-#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
-#define I40E_NVM_CON 0x0
-#define I40E_NVM_SNT 0x1
-#define I40E_NVM_LCB 0x2
-#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
-#define I40E_NVM_ERA 0x4
-#define I40E_NVM_CSUM 0x8
-#define I40E_NVM_EXEC 0xf
+#define I40E_NVM_TRANS_SHIFT 8
+#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12
+#define I40E_NVM_PRESERVATION_FLAGS_MASK \
+ (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01
+#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02
+#define I40E_NVM_CON 0x0
+#define I40E_NVM_SNT 0x1
+#define I40E_NVM_LCB 0x2
+#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA 0x4
+#define I40E_NVM_CSUM 0x8
+#define I40E_NVM_AQE 0xe
+#define I40E_NVM_EXEC 0xf
#define I40E_NVM_ADAPT_SHIFT 16
#define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT)
@@ -611,6 +618,7 @@ struct i40e_hw {
/* state of nvm update process */
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
+ struct i40e_aq_desc nvm_aq_event_desc;
struct i40e_virt_mem nvm_buff;
bool nvm_release_on_done;
u16 nvm_wait_opcode;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 435a112..b0e6454 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -2196,8 +2196,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
*/
struct i40e_aqc_nvm_update {
u8 command_flags;
-#define I40E_AQ_NVM_LAST_CMD 0x01
-#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_LAST_CMD 0x01
+#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01
u8 module_pointer;
__le16 length;
__le32 offset;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 1ba29bb..c7831f7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -148,6 +148,59 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
return 0;
}
+/**
+ * i40evf_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi: pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
+{
+ struct i40e_ring *tx_ring = NULL;
+ struct net_device *netdev;
+ unsigned int i;
+ int packets;
+
+ if (!vsi)
+ return;
+
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev)
+ return;
+
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ for (i = 0; i < vsi->back->num_active_queues; i++) {
+ tx_ring = &vsi->back->tx_rings[i];
+ if (tx_ring && tx_ring->desc) {
+ /* If packet counter has not changed the queue is
+ * likely stalled, so force an interrupt for this
+ * queue.
+ *
+ * prev_pkt_ctr would be negative if there was no
+ * pending work.
+ */
+ packets = tx_ring->stats.packets & INT_MAX;
+ if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+ i40evf_force_wb(vsi, tx_ring->q_vector);
+ continue;
+ }
+
+ /* Memory barrier between read of packet count and call
+ * to i40evf_get_tx_pending()
+ */
+ smp_rmb();
+ tx_ring->tx_stats.prev_pkt_ctr =
+ i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+ }
+ }
+}
+
#define WB_STRIDE 4
/**
@@ -469,6 +522,7 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->tx_stats.prev_pkt_ctr = -1;
return 0;
err:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 8d26c85..e72f16b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -313,6 +313,7 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
+ int prev_pkt_ctr;
u64 tx_lost_interrupt;
};
@@ -467,6 +468,7 @@ void i40evf_free_rx_resources(struct i40e_ring *rx_ring);
int i40evf_napi_poll(struct napi_struct *napi, int budget);
void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw);
+void i40evf_detect_recover_hung(struct i40e_vsi *vsi);
int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40evf_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 6afc316..54951c8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -361,6 +361,7 @@ enum i40e_nvmupd_cmd {
I40E_NVMUPD_STATUS,
I40E_NVMUPD_EXEC_AQ,
I40E_NVMUPD_GET_AQ_RESULT,
+ I40E_NVMUPD_GET_AQ_EVENT,
};
enum i40e_nvmupd_state {
@@ -380,15 +381,21 @@ enum i40e_nvmupd_state {
#define I40E_NVM_MOD_PNT_MASK 0xFF
-#define I40E_NVM_TRANS_SHIFT 8
-#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
-#define I40E_NVM_CON 0x0
-#define I40E_NVM_SNT 0x1
-#define I40E_NVM_LCB 0x2
-#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
-#define I40E_NVM_ERA 0x4
-#define I40E_NVM_CSUM 0x8
-#define I40E_NVM_EXEC 0xf
+#define I40E_NVM_TRANS_SHIFT 8
+#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12
+#define I40E_NVM_PRESERVATION_FLAGS_MASK \
+ (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01
+#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02
+#define I40E_NVM_CON 0x0
+#define I40E_NVM_SNT 0x1
+#define I40E_NVM_LCB 0x2
+#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA 0x4
+#define I40E_NVM_CSUM 0x8
+#define I40E_NVM_AQE 0xe
+#define I40E_NVM_EXEC 0xf
#define I40E_NVM_ADAPT_SHIFT 16
#define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT)
@@ -561,6 +568,7 @@ struct i40e_hw {
/* state of nvm update process */
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
+ struct i40e_aq_desc nvm_aq_event_desc;
struct i40e_virt_mem nvm_buff;
bool nvm_release_on_done;
u16 nvm_wait_opcode;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 47040ab..33c0ffc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -187,6 +187,7 @@ enum i40evf_state_t {
enum i40evf_critical_section_t {
__I40EVF_IN_CRITICAL_TASK, /* cannot be interrupted */
__I40EVF_IN_CLIENT_TASK,
+ __I40EVF_IN_REMOVE_TASK, /* device being removed */
};
/* board specific private data structure */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index f92587a..8934f78 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1716,6 +1716,8 @@ static void i40evf_watchdog_task(struct work_struct *work)
if (adapter->state == __I40EVF_RUNNING)
i40evf_request_stats(adapter);
watchdog_done:
+ if (adapter->state == __I40EVF_RUNNING)
+ i40evf_detect_recover_hung(&adapter->vsi);
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
restart_watchdog:
if (adapter->state == __I40EVF_REMOVE)
@@ -1803,6 +1805,12 @@ static void i40evf_reset_task(struct work_struct *work)
int i = 0, err;
bool running;
+ /* When device is being removed it doesn't make sense to run the reset
+ * task, just return in such a case.
+ */
+ if (test_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section))
+ return;
+
while (test_and_set_bit(__I40EVF_IN_CLIENT_TASK,
&adapter->crit_section))
usleep_range(500, 1000);
@@ -3053,7 +3061,8 @@ static void i40evf_remove(struct pci_dev *pdev)
struct i40evf_mac_filter *f, *ftmp;
struct i40e_hw *hw = &adapter->hw;
int err;
-
+ /* Indicate we are in remove and not to run reset_task */
+ set_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section);
cancel_delayed_work_sync(&adapter->init_task);
cancel_work_sync(&adapter->reset_task);
cancel_delayed_work_sync(&adapter->client_task);
@@ -3088,8 +3097,6 @@ static void i40evf_remove(struct pci_dev *pdev)
if (adapter->watchdog_timer.function)
del_timer_sync(&adapter->watchdog_timer);
- flush_scheduled_work();
-
i40evf_free_rss(adapter);
if (hw->aq.asq.count)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index feb95b6..50ce0d6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1001,23 +1001,34 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
if (v_opcode == VIRTCHNL_OP_EVENT) {
struct virtchnl_pf_event *vpe =
(struct virtchnl_pf_event *)msg;
+ bool link_up = vpe->event_data.link_event.link_status;
switch (vpe->event) {
case VIRTCHNL_EVENT_LINK_CHANGE:
adapter->link_speed =
vpe->event_data.link_event.link_speed;
- if (adapter->link_up !=
- vpe->event_data.link_event.link_status) {
- adapter->link_up =
- vpe->event_data.link_event.link_status;
- if (adapter->link_up) {
- netif_tx_start_all_queues(netdev);
- netif_carrier_on(netdev);
- } else {
- netif_tx_stop_all_queues(netdev);
- netif_carrier_off(netdev);
- }
- i40evf_print_link_message(adapter);
+
+ /* we've already got the right link status, bail */
+ if (adapter->link_up == link_up)
+ break;
+
+ /* If we get link up message and start queues before
+ * our queues are configured it will trigger a TX hang.
+ * In that case, just ignore the link status message,
+ * we'll get another one after we enable queues and
+ * actually prepared to send traffic.
+ */
+ if (link_up && adapter->state != __I40EVF_RUNNING)
+ break;
+
+ adapter->link_up = link_up;
+ if (link_up) {
+ netif_tx_start_all_queues(netdev);
+ netif_carrier_on(netdev);
+ } else {
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
}
+ i40evf_print_link_message(adapter);
break;
case VIRTCHNL_EVENT_RESET_IMPENDING:
dev_info(&adapter->pdev->dev, "PF reset warning received\n");
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 9284569..1c6b8d91 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -690,6 +690,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
+unsigned int igb_get_max_rss_queues(struct igb_adapter *);
#ifdef CONFIG_IGB_HWMON
void igb_sysfs_exit(struct igb_adapter *adapter);
int igb_sysfs_init(struct igb_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index d06a8db..606e676 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3338,37 +3338,7 @@ static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
static unsigned int igb_max_channels(struct igb_adapter *adapter)
{
- struct e1000_hw *hw = &adapter->hw;
- unsigned int max_combined = 0;
-
- switch (hw->mac.type) {
- case e1000_i211:
- max_combined = IGB_MAX_RX_QUEUES_I211;
- break;
- case e1000_82575:
- case e1000_i210:
- max_combined = IGB_MAX_RX_QUEUES_82575;
- break;
- case e1000_i350:
- if (!!adapter->vfs_allocated_count) {
- max_combined = 1;
- break;
- }
- /* fall through */
- case e1000_82576:
- if (!!adapter->vfs_allocated_count) {
- max_combined = 2;
- break;
- }
- /* fall through */
- case e1000_82580:
- case e1000_i354:
- default:
- max_combined = IGB_MAX_RX_QUEUES;
- break;
- }
-
- return max_combined;
+ return igb_get_max_rss_queues(adapter);
}
static void igb_get_channels(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c208753..b88fae7 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1744,6 +1744,20 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
* value = idleSlope * 61034
* ----------------- (E6)
* 1000000
+ *
+ * NOTE: For i210, given the above, we can see that idleslope
+ * is represented in 16.38431 kbps units by the value at
+ * the TQAVCC register (1Gbps / 61034), which reduces
+ * the granularity for idleslope increments.
+ * For instance, if you want to configure a 2576kbps
+ * idleslope, the value to be written on the register
+ * would have to be 157.23. If rounded down, you end
+ * up with less bandwidth available than originally
+ * required (~2572 kbps). If rounded up, you end up
+ * with a higher bandwidth (~2589 kbps). Below the
+ * approach we take is to always round up the
+ * calculated value, so the resulting bandwidth might
+ * be slightly higher for some configurations.
*/
value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
@@ -3200,8 +3214,6 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs)
/* if allocation failed then we do not support SR-IOV */
if (!adapter->vf_data) {
adapter->vfs_allocated_count = 0;
- dev_err(&pdev->dev,
- "Unable to allocate memory for VF Data Storage\n");
err = -ENOMEM;
goto out;
}
@@ -3373,10 +3385,10 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
#endif /* CONFIG_PCI_IOV */
}
-static void igb_init_queue_configuration(struct igb_adapter *adapter)
+unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- u32 max_rss_queues;
+ unsigned int max_rss_queues;
/* Determine the maximum number of RSS queues supported. */
switch (hw->mac.type) {
@@ -3407,6 +3419,14 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter)
break;
}
+ return max_rss_queues;
+}
+
+static void igb_init_queue_configuration(struct igb_adapter *adapter)
+{
+ u32 max_rss_queues;
+
+ max_rss_queues = igb_get_max_rss_queues(adapter);
adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
igb_set_flag_queue_pairs(adapter, max_rss_queues);
@@ -3676,7 +3696,7 @@ static int __igb_close(struct net_device *netdev, bool suspending)
int igb_close(struct net_device *netdev)
{
- if (netif_device_present(netdev))
+ if (netif_device_present(netdev) || netdev->dismantle)
return __igb_close(netdev, false);
return 0;
}
@@ -8718,7 +8738,8 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index)
/* Indicate to hardware the Address is Valid. */
if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) {
- rar_high |= E1000_RAH_AV;
+ if (is_valid_ether_addr(addr))
+ rar_high |= E1000_RAH_AV;
if (hw->mac.type == e1000_82575)
rar_high |= E1000_RAH_POOL_1 *
@@ -8756,17 +8777,36 @@ static int igb_set_vf_mac(struct igb_adapter *adapter,
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
struct igb_adapter *adapter = netdev_priv(netdev);
- if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
+
+ if (vf >= adapter->vfs_allocated_count)
+ return -EINVAL;
+
+ /* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC
+ * flag and allows to overwrite the MAC via VF netdev. This
+ * is necessary to allow libvirt a way to restore the original
+ * MAC after unbinding vfio-pci and reloading igbvf after shutting
+ * down a VM.
+ */
+ if (is_zero_ether_addr(mac)) {
+ adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC;
+ dev_info(&adapter->pdev->dev,
+ "remove administratively set MAC on VF %d\n",
+ vf);
+ } else if (is_valid_ether_addr(mac)) {
+ adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
+ dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n",
+ mac, vf);
+ dev_info(&adapter->pdev->dev,
+ "Reload the VF driver to make this change effective.");
+ /* Generate additional warning if PF is down */
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ dev_warn(&adapter->pdev->dev,
+ "The VF MAC address has been set, but the PF device is not up.\n");
+ dev_warn(&adapter->pdev->dev,
+ "Bring the PF device up before attempting to use the VF device.\n");
+ }
+ } else {
return -EINVAL;
- adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
- dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
- dev_info(&adapter->pdev->dev,
- "Reload the VF driver to make this change effective.");
- if (test_bit(__IGB_DOWN, &adapter->state)) {
- dev_warn(&adapter->pdev->dev,
- "The VF MAC address has been set, but the PF device is not up.\n");
- dev_warn(&adapter->pdev->dev,
- "Bring the PF device up before attempting to use the VF device.\n");
}
return igb_set_vf_mac(adapter, vf, mac);
}
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 841c2a0..0746b19 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -643,6 +643,10 @@ static void igb_ptp_tx_work(struct work_struct *work)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
adapter->tx_hwtstamp_timeouts++;
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(E1000_TXSTMPH);
dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
return;
}
@@ -717,6 +721,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter)
*/
void igb_ptp_tx_hang(struct igb_adapter *adapter)
{
+ struct e1000_hw *hw = &adapter->hw;
bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
IGB_PTP_TX_TIMEOUT);
@@ -736,6 +741,10 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
adapter->tx_hwtstamp_timeouts++;
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(E1000_TXSTMPH);
dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
}
}
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 35e6fa6..8319465 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -42,3 +42,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \
ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
+ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 49ab0c7..c1e3a00 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -52,6 +52,7 @@
#ifdef CONFIG_IXGBE_DCA
#include <linux/dca.h>
#endif
+#include "ixgbe_ipsec.h"
#include <net/xdp.h>
#include <net/busy_poll.h>
@@ -171,10 +172,11 @@ enum ixgbe_tx_flags {
IXGBE_TX_FLAGS_CC = 0x08,
IXGBE_TX_FLAGS_IPV4 = 0x10,
IXGBE_TX_FLAGS_CSUM = 0x20,
+ IXGBE_TX_FLAGS_IPSEC = 0x40,
/* software defined flags */
- IXGBE_TX_FLAGS_SW_VLAN = 0x40,
- IXGBE_TX_FLAGS_FCOE = 0x80,
+ IXGBE_TX_FLAGS_SW_VLAN = 0x80,
+ IXGBE_TX_FLAGS_FCOE = 0x100,
};
/* VLAN info */
@@ -629,15 +631,18 @@ struct ixgbe_adapter {
#define IXGBE_FLAG2_EEE_CAPABLE BIT(14)
#define IXGBE_FLAG2_EEE_ENABLED BIT(15)
#define IXGBE_FLAG2_RX_LEGACY BIT(16)
+#define IXGBE_FLAG2_IPSEC_ENABLED BIT(17)
/* Tx fast path data */
int num_tx_queues;
u16 tx_itr_setting;
u16 tx_work_limit;
+ u64 tx_ipsec;
/* Rx fast path data */
int num_rx_queues;
u16 rx_itr_setting;
+ u64 rx_ipsec;
/* Port number used to identify VXLAN traffic */
__be16 vxlan_port;
@@ -781,6 +786,10 @@ struct ixgbe_adapter {
#define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */
u32 *rss_key;
+
+#ifdef CONFIG_XFRM
+ struct ixgbe_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
};
static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
@@ -1011,4 +1020,24 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
void ixgbe_store_reta(struct ixgbe_adapter *adapter);
s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
+#ifdef CONFIG_XFRM_OFFLOAD
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb);
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd);
+#else
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb) { };
+static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd) { return 0; };
+#endif /* CONFIG_XFRM_OFFLOAD */
#endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 7ac7ef9..61188f3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -4087,7 +4087,7 @@ void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw,
hw->eeprom.ops.read(hw, NVM_OEM_PROD_VER_PTR, &offset);
/* Return is offset to OEM Product Version block is invalid */
- if (offset == 0x0 && offset == NVM_INVALID_PTR)
+ if (offset == 0x0 || offset == NVM_INVALID_PTR)
return;
/* Read product version block */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f064099..221f158 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -115,6 +115,8 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
{"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)},
{"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)},
{"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)},
+ {"tx_ipsec", IXGBE_STAT(tx_ipsec)},
+ {"rx_ipsec", IXGBE_STAT(rx_ipsec)},
#ifdef IXGBE_FCOE
{"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)},
{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
@@ -3083,26 +3085,9 @@ static int ixgbe_get_ts_info(struct net_device *dev,
case ixgbe_mac_X550EM_x:
case ixgbe_mac_x550em_a:
info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
- /* fallthrough */
+ break;
case ixgbe_mac_X540:
case ixgbe_mac_82599EB:
- info->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE |
- SOF_TIMESTAMPING_TX_HARDWARE |
- SOF_TIMESTAMPING_RX_HARDWARE |
- SOF_TIMESTAMPING_RAW_HARDWARE;
-
- if (adapter->ptp_clock)
- info->phc_index = ptp_clock_index(adapter->ptp_clock);
- else
- info->phc_index = -1;
-
- info->tx_types =
- BIT(HWTSTAMP_TX_OFF) |
- BIT(HWTSTAMP_TX_ON);
-
info->rx_filters |=
BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
@@ -3111,6 +3096,24 @@ static int ixgbe_get_ts_info(struct net_device *dev,
default:
return ethtool_op_get_ts_info(dev, info);
}
+
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ if (adapter->ptp_clock)
+ info->phc_index = ptp_clock_index(adapter->ptp_clock);
+ else
+ info->phc_index = -1;
+
+ info->tx_types =
+ BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
new file mode 100644
index 0000000..93eacdd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -0,0 +1,941 @@
+/*******************************************************************************
+ *
+ * Intel 10 Gigabit PCI Express Linux driver
+ * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * Linux NICS <linux.nics@intel.com>
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "ixgbe.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+
+/**
+ * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @key: key byte array
+ * @salt: salt bytes
+ **/
+static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx,
+ u32 key[], u32 salt)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i), cpu_to_be32(key[3 - i]));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, cpu_to_be32(salt));
+ IXGBE_WRITE_FLUSH(hw);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX);
+ reg &= IXGBE_RXTXIDX_IPS_EN;
+ reg |= idx << IXGBE_RXTXIDX_IDX_SHIFT | IXGBE_RXTXIDX_WRITE;
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, reg);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_item - set an Rx table item
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @tbl: table selector
+ *
+ * Trigger the device to store into a particular Rx table the
+ * data that has already been loaded into the input register
+ **/
+static void ixgbe_ipsec_set_rx_item(struct ixgbe_hw *hw, u16 idx,
+ enum ixgbe_ipsec_tbl_sel tbl)
+{
+ u32 reg;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_IPSRXIDX);
+ reg &= IXGBE_RXTXIDX_IPS_EN;
+ reg |= tbl << IXGBE_RXIDX_TBL_SHIFT |
+ idx << IXGBE_RXTXIDX_IDX_SHIFT |
+ IXGBE_RXTXIDX_WRITE;
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, reg);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_sa - set up the register bits to save SA info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @spi: security parameter index
+ * @key: key byte array
+ * @salt: salt bytes
+ * @mode: rx decrypt control bits
+ * @ip_idx: index into IP table for related IP address
+ **/
+static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
+ u32 key[], u32 salt, u32 mode, u32 ip_idx)
+{
+ int i;
+
+ /* store the SPI (in bigendian) and IPidx */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, cpu_to_le32(spi));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx);
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_spi_tbl);
+
+ /* store the key, salt, and mode */
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i), cpu_to_be32(key[3 - i]));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, cpu_to_be32(salt));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode);
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_key_tbl);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_ip - set up the register bits to save SA IP addr info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @addr: IP address byte array
+ **/
+static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
+{
+ int i;
+
+ /* store the ip address */
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i), cpu_to_le32(addr[i]));
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl);
+}
+
+/**
+ * ixgbe_ipsec_clear_hw_tables - because some tables don't get cleared on reset
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 buf[4] = {0, 0, 0, 0};
+ u16 idx;
+
+ /* disable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+
+ /* scrub the tables - split the loops for the max of the IP table */
+ for (idx = 0; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) {
+ ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+ ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+ ixgbe_ipsec_set_rx_ip(hw, idx, (__be32 *)buf);
+ }
+ for (; idx < IXGBE_IPSEC_MAX_SA_COUNT; idx++) {
+ ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+ ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+ }
+
+ ipsec->num_rx_sa = 0;
+ ipsec->num_tx_sa = 0;
+}
+
+/**
+ * ixgbe_ipsec_stop_data
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool link = adapter->link_up;
+ u32 t_rdy, r_rdy;
+ u32 limit;
+ u32 reg;
+
+ /* halt data paths */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+ reg |= IXGBE_SECTXCTRL_TX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ reg |= IXGBE_SECRXCTRL_RX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* If the tx fifo doesn't have link, but still has data,
+ * we can't clear the tx sec block. Set the MAC loopback
+ * before block clear
+ */
+ if (!link) {
+ reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ reg |= IXGBE_MACC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ reg |= IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+ mdelay(3);
+ }
+
+ /* wait for the paths to empty */
+ limit = 20;
+ do {
+ mdelay(10);
+ t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+ IXGBE_SECTXSTAT_SECTX_RDY;
+ r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+ IXGBE_SECRXSTAT_SECRX_RDY;
+ } while (!t_rdy && !r_rdy && limit--);
+
+ /* undo loopback if we played with it earlier */
+ if (!link) {
+ reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ reg &= ~IXGBE_MACC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ reg &= ~IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+ }
+}
+
+/**
+ * ixgbe_ipsec_stop_engine
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 reg;
+
+ ixgbe_ipsec_stop_data(adapter);
+
+ /* disable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+
+ /* disable the Rx and Tx engines and full packet store-n-forward */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+ reg |= IXGBE_SECTXCTRL_SECTX_DIS;
+ reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ reg |= IXGBE_SECRXCTRL_SECRX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+ /* restore the "tx security buffer almost full threshold" to 0x250 */
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250);
+
+ /* Set minimum IFG between packets back to the default 0x1 */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+ reg = (reg & 0xfffffff0) | 0x1;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+ /* final set for normal (no ipsec offload) processing */
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS);
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_start_engine
+ * @adapter: board private structure
+ *
+ * NOTE: this increases power consumption whether being used or not
+ **/
+static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 reg;
+
+ ixgbe_ipsec_stop_data(adapter);
+
+ /* Set minimum IFG between packets to 3 */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+ reg = (reg & 0xfffffff0) | 0x3;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+ /* Set "tx security buffer almost full threshold" to 0x15 so that the
+ * almost full indication is generated only after buffer contains at
+ * least an entire jumbo packet.
+ */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+ reg = (reg & 0xfffffc00) | 0x15;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg);
+
+ /* restart the data paths by clearing the DISABLE bits */
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD);
+
+ /* enable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
+ * @adapter: board private structure
+ **/
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+
+ if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED))
+ return;
+
+ /* clean up and restart the engine */
+ ixgbe_ipsec_stop_engine(adapter);
+ ixgbe_ipsec_clear_hw_tables(adapter);
+ ixgbe_ipsec_start_engine(adapter);
+
+ /* reload the IP addrs */
+ for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+ struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+ if (ipsa->used)
+ ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+ }
+
+ /* reload the Rx and Tx keys */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ struct rx_sa *rsa = &ipsec->rx_tbl[i];
+ struct tx_sa *tsa = &ipsec->tx_tbl[i];
+
+ if (rsa->used)
+ ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
+ rsa->key, rsa->salt,
+ rsa->mode, rsa->iptbl_ind);
+
+ if (tsa->used)
+ ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
+ }
+}
+
+/**
+ * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to ipsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable)
+{
+ u32 i;
+
+ if (rxtable) {
+ if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+ return -ENOSPC;
+
+ /* search rx sa table */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ if (!ipsec->rx_tbl[i].used)
+ return i;
+ }
+ } else {
+ if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+ return -ENOSPC;
+
+ /* search tx sa table */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ if (!ipsec->tx_tbl[i].used)
+ return i;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+/**
+ * ixgbe_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to ipsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an ipv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
+ __be32 *daddr, u8 proto,
+ __be32 spi, bool ip4)
+{
+ struct rx_sa *rsa;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist, spi)
+ if (spi == rsa->xs->id.spi &&
+ ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+ (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+ sizeof(rsa->xs->id.daddr.a6)))) &&
+ proto == rsa->xs->id.proto) {
+ ret = rsa->xs;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables. The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
+ u32 *mykey, u32 *mysalt)
+{
+ struct net_device *dev = xs->xso.dev;
+ unsigned char *key_data;
+ char *alg_name = NULL;
+ const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+ int key_len;
+
+ if (xs->aead) {
+ key_data = &xs->aead->alg_key[0];
+ key_len = xs->aead->alg_key_len;
+ alg_name = xs->aead->alg_name;
+ } else {
+ netdev_err(dev, "Unsupported IPsec algorithm\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(alg_name, aes_gcm_name)) {
+ netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+ aes_gcm_name);
+ return -EINVAL;
+ }
+
+ /* The key bytes come down in a bigendian array of bytes, so
+ * we don't need to do any byteswapping.
+ * 160 accounts for 16 byte key and 4 byte salt
+ */
+ if (key_len == 160) {
+ *mysalt = ((u32 *)key_data)[4];
+ } else if (key_len != 128) {
+ netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+ return -EINVAL;
+ } else {
+ netdev_info(dev, "IPsec hw offload parameters missing 32 bit salt value\n");
+ *mysalt = 0;
+ }
+ memcpy(mykey, key_data, 16);
+
+ return 0;
+}
+
+/**
+ * ixgbe_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
+{
+ struct net_device *dev = xs->xso.dev;
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int checked, match, first;
+ u16 sa_idx;
+ int ret;
+ int i;
+
+ if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+ netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n",
+ xs->id.proto);
+ return -EINVAL;
+ }
+
+ if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ struct rx_sa rsa;
+
+ if (xs->calg) {
+ netdev_err(dev, "Compression offload not supported\n");
+ return -EINVAL;
+ }
+
+ /* find the first unused index */
+ ret = ixgbe_ipsec_find_empty_idx(ipsec, true);
+ if (ret < 0) {
+ netdev_err(dev, "No space for SA in Rx table!\n");
+ return ret;
+ }
+ sa_idx = (u16)ret;
+
+ memset(&rsa, 0, sizeof(rsa));
+ rsa.used = true;
+ rsa.xs = xs;
+
+ if (rsa.xs->id.proto & IPPROTO_ESP)
+ rsa.decrypt = xs->ealg || xs->aead;
+
+ /* get the key and salt */
+ ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+ if (ret) {
+ netdev_err(dev, "Failed to get key data for Rx SA table\n");
+ return ret;
+ }
+
+ /* get ip for rx sa table */
+ if (xs->props.family == AF_INET6)
+ memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+ else
+ memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+ /* The HW does not have a 1:1 mapping from keys to IP addrs, so
+ * check for a matching IP addr entry in the table. If the addr
+ * already exists, use it; else find an unused slot and add the
+ * addr. If one does not exist and there are no unused table
+ * entries, fail the request.
+ */
+
+ /* Find an existing match or first not used, and stop looking
+ * after we've checked all we know we have.
+ */
+ checked = 0;
+ match = -1;
+ first = -1;
+ for (i = 0;
+ i < IXGBE_IPSEC_MAX_RX_IP_COUNT &&
+ (checked < ipsec->num_rx_sa || first < 0);
+ i++) {
+ if (ipsec->ip_tbl[i].used) {
+ if (!memcmp(ipsec->ip_tbl[i].ipaddr,
+ rsa.ipaddr, sizeof(rsa.ipaddr))) {
+ match = i;
+ break;
+ }
+ checked++;
+ } else if (first < 0) {
+ first = i; /* track the first empty seen */
+ }
+ }
+
+ if (ipsec->num_rx_sa == 0)
+ first = 0;
+
+ if (match >= 0) {
+ /* addrs are the same, we should use this one */
+ rsa.iptbl_ind = match;
+ ipsec->ip_tbl[match].ref_cnt++;
+
+ } else if (first >= 0) {
+ /* no matches, but here's an empty slot */
+ rsa.iptbl_ind = first;
+
+ memcpy(ipsec->ip_tbl[first].ipaddr,
+ rsa.ipaddr, sizeof(rsa.ipaddr));
+ ipsec->ip_tbl[first].ref_cnt = 1;
+ ipsec->ip_tbl[first].used = true;
+
+ ixgbe_ipsec_set_rx_ip(hw, rsa.iptbl_ind, rsa.ipaddr);
+
+ } else {
+ /* no match and no empty slot */
+ netdev_err(dev, "No space for SA in Rx IP SA table\n");
+ memset(&rsa, 0, sizeof(rsa));
+ return -ENOSPC;
+ }
+
+ rsa.mode = IXGBE_RXMOD_VALID;
+ if (rsa.xs->id.proto & IPPROTO_ESP)
+ rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+ if (rsa.decrypt)
+ rsa.mode |= IXGBE_RXMOD_DECRYPT;
+ if (rsa.xs->props.family == AF_INET6)
+ rsa.mode |= IXGBE_RXMOD_IPV6;
+
+ /* the preparations worked, so save the info */
+ memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+ ixgbe_ipsec_set_rx_sa(hw, sa_idx, rsa.xs->id.spi, rsa.key,
+ rsa.salt, rsa.mode, rsa.iptbl_ind);
+ xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+ ipsec->num_rx_sa++;
+
+ /* hash the new entry for faster search in Rx path */
+ hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+ rsa.xs->id.spi);
+ } else {
+ struct tx_sa tsa;
+
+ /* find the first unused index */
+ ret = ixgbe_ipsec_find_empty_idx(ipsec, false);
+ if (ret < 0) {
+ netdev_err(dev, "No space for SA in Tx table\n");
+ return ret;
+ }
+ sa_idx = (u16)ret;
+
+ memset(&tsa, 0, sizeof(tsa));
+ tsa.used = true;
+ tsa.xs = xs;
+
+ if (xs->id.proto & IPPROTO_ESP)
+ tsa.encrypt = xs->ealg || xs->aead;
+
+ ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+ if (ret) {
+ netdev_err(dev, "Failed to get key data for Tx SA table\n");
+ memset(&tsa, 0, sizeof(tsa));
+ return ret;
+ }
+
+ /* the preparations worked, so save the info */
+ memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+ ixgbe_ipsec_set_tx_sa(hw, sa_idx, tsa.key, tsa.salt);
+
+ xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+ ipsec->num_tx_sa++;
+ }
+
+ /* enable the engine if not already warmed up */
+ if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) {
+ ixgbe_ipsec_start_engine(adapter);
+ adapter->flags2 |= IXGBE_FLAG2_IPSEC_ENABLED;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
+{
+ struct net_device *dev = xs->xso.dev;
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 zerobuf[4] = {0, 0, 0, 0};
+ u16 sa_idx;
+
+ if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ struct rx_sa *rsa;
+ u8 ipi;
+
+ sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+ rsa = &ipsec->rx_tbl[sa_idx];
+
+ if (!rsa->used) {
+ netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+ sa_idx, xs->xso.offload_handle);
+ return;
+ }
+
+ ixgbe_ipsec_set_rx_sa(hw, sa_idx, 0, zerobuf, 0, 0, 0);
+ hash_del_rcu(&rsa->hlist);
+
+ /* if the IP table entry is referenced by only this SA,
+ * i.e. ref_cnt is only 1, clear the IP table entry as well
+ */
+ ipi = rsa->iptbl_ind;
+ if (ipsec->ip_tbl[ipi].ref_cnt > 0) {
+ ipsec->ip_tbl[ipi].ref_cnt--;
+
+ if (!ipsec->ip_tbl[ipi].ref_cnt) {
+ memset(&ipsec->ip_tbl[ipi], 0,
+ sizeof(struct rx_ip_sa));
+ ixgbe_ipsec_set_rx_ip(hw, ipi, zerobuf);
+ }
+ }
+
+ memset(rsa, 0, sizeof(struct rx_sa));
+ ipsec->num_rx_sa--;
+ } else {
+ sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+ if (!ipsec->tx_tbl[sa_idx].used) {
+ netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+ sa_idx, xs->xso.offload_handle);
+ return;
+ }
+
+ ixgbe_ipsec_set_tx_sa(hw, sa_idx, zerobuf, 0);
+ memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+ ipsec->num_tx_sa--;
+ }
+
+ /* if there are no SAs left, stop the engine to save energy */
+ if (ipsec->num_rx_sa == 0 && ipsec->num_tx_sa == 0) {
+ adapter->flags2 &= ~IXGBE_FLAG2_IPSEC_ENABLED;
+ ixgbe_ipsec_stop_engine(adapter);
+ }
+}
+
+/**
+ * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+ if (xs->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl != 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ixgbe_ipsec_free - called by xfrm garbage collections
+ * @xs: pointer to transformer state struct
+ *
+ * We don't have any garbage to collect, so we shouldn't bother
+ * implementing this function, but the XFRM code doesn't check for
+ * existence before calling the API callback.
+ **/
+static void ixgbe_ipsec_free(struct xfrm_state *xs)
+{
+}
+
+static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
+ .xdo_dev_state_add = ixgbe_ipsec_add_sa,
+ .xdo_dev_state_delete = ixgbe_ipsec_del_sa,
+ .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
+ .xdo_dev_state_free = ixgbe_ipsec_free,
+};
+
+/**
+ * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct xfrm_state *xs;
+ struct tx_sa *tsa;
+
+ if (unlikely(!first->skb->sp->len)) {
+ netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+ __func__, first->skb->sp->len);
+ return 0;
+ }
+
+ xs = xfrm_input_state(first->skb);
+ if (unlikely(!xs)) {
+ netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+ __func__, xs);
+ return 0;
+ }
+
+ itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+ if (unlikely(itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT)) {
+ netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+ __func__, itd->sa_idx, xs->xso.offload_handle);
+ return 0;
+ }
+
+ tsa = &ipsec->tx_tbl[itd->sa_idx];
+ if (unlikely(!tsa->used)) {
+ netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+ __func__, itd->sa_idx);
+ return 0;
+ }
+
+ first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
+
+ itd->flags = 0;
+ if (xs->id.proto == IPPROTO_ESP) {
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+ IXGBE_ADVTXD_TUCMD_L4T_TCP;
+ if (first->protocol == htons(ETH_P_IP))
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+ itd->trailer_len = xs->props.trailer_len;
+ }
+ if (tsa->encrypt)
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+ return 1;
+}
+
+/**
+ * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an ipsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev);
+ __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+ __le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+ IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct xfrm_offload *xo = NULL;
+ struct xfrm_state *xs = NULL;
+ struct ipv6hdr *ip6 = NULL;
+ struct iphdr *ip4 = NULL;
+ void *daddr;
+ __be32 spi;
+ u8 *c_hdr;
+ u8 proto;
+
+ /* Find the ip and crypto headers in the data.
+ * We can assume no vlan header in the way, b/c the
+ * hw won't recognize the IPsec packet and anyway the
+ * currently vlan device doesn't support xfrm offload.
+ */
+ if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+ ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+ daddr = &ip4->daddr;
+ c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+ } else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+ ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+ daddr = &ip6->daddr;
+ c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+ } else {
+ return;
+ }
+
+ switch (pkt_info & ipsec_pkt_types) {
+ case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+ spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+ proto = IPPROTO_AH;
+ break;
+ case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+ spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+ proto = IPPROTO_ESP;
+ break;
+ default:
+ return;
+ }
+
+ xs = ixgbe_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+ if (unlikely(!xs))
+ return;
+
+ skb->sp = secpath_dup(skb->sp);
+ if (unlikely(!skb->sp))
+ return;
+
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ xo->status = CRYPTO_SUCCESS;
+
+ adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
+ * @adapter: board private structure
+ **/
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec;
+ size_t size;
+
+ if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+ return;
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ goto err1;
+ hash_init(ipsec->rx_sa_list);
+
+ size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+ ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->rx_tbl)
+ goto err2;
+
+ size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+ ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->tx_tbl)
+ goto err2;
+
+ size = sizeof(struct rx_ip_sa) * IXGBE_IPSEC_MAX_RX_IP_COUNT;
+ ipsec->ip_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->ip_tbl)
+ goto err2;
+
+ ipsec->num_rx_sa = 0;
+ ipsec->num_tx_sa = 0;
+
+ adapter->ipsec = ipsec;
+ ixgbe_ipsec_stop_engine(adapter);
+ ixgbe_ipsec_clear_hw_tables(adapter);
+
+ adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+ adapter->netdev->features |= NETIF_F_HW_ESP;
+ adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ return;
+
+err2:
+ kfree(ipsec->ip_tbl);
+ kfree(ipsec->rx_tbl);
+ kfree(ipsec->tx_tbl);
+err1:
+ kfree(adapter->ipsec);
+ netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbe_stop_ipsec_offload - tear down the ipsec offload
+ * @adapter: board private structure
+ **/
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+
+ adapter->ipsec = NULL;
+ if (ipsec) {
+ kfree(ipsec->ip_tbl);
+ kfree(ipsec->rx_tbl);
+ kfree(ipsec->tx_tbl);
+ kfree(ipsec);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
new file mode 100644
index 0000000..da3ce78
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -0,0 +1,93 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_IPSEC_H_
+#define _IXGBE_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT 1024
+#define IXGBE_IPSEC_MAX_RX_IP_COUNT 128
+#define IXGBE_IPSEC_BASE_RX_INDEX 0
+#define IXGBE_IPSEC_BASE_TX_INDEX IXGBE_IPSEC_MAX_SA_COUNT
+
+#define IXGBE_RXTXIDX_IPS_EN 0x00000001
+#define IXGBE_RXIDX_TBL_SHIFT 1
+enum ixgbe_ipsec_tbl_sel {
+ ips_rx_ip_tbl = 0x01,
+ ips_rx_spi_tbl = 0x02,
+ ips_rx_key_tbl = 0x03,
+};
+
+#define IXGBE_RXTXIDX_IDX_SHIFT 3
+#define IXGBE_RXTXIDX_READ 0x40000000
+#define IXGBE_RXTXIDX_WRITE 0x80000000
+
+#define IXGBE_RXMOD_VALID 0x00000001
+#define IXGBE_RXMOD_PROTO_ESP 0x00000004
+#define IXGBE_RXMOD_DECRYPT 0x00000008
+#define IXGBE_RXMOD_IPV6 0x00000010
+
+struct rx_sa {
+ struct hlist_node hlist;
+ struct xfrm_state *xs;
+ __be32 ipaddr[4];
+ u32 key[4];
+ u32 salt;
+ u32 mode;
+ u8 iptbl_ind;
+ bool used;
+ bool decrypt;
+};
+
+struct rx_ip_sa {
+ __be32 ipaddr[4];
+ u32 ref_cnt;
+ bool used;
+};
+
+struct tx_sa {
+ struct xfrm_state *xs;
+ u32 key[4];
+ u32 salt;
+ bool encrypt;
+ bool used;
+};
+
+struct ixgbe_ipsec_tx_data {
+ u32 flags;
+ u16 trailer_len;
+ u16 sa_idx;
+};
+
+struct ixgbe_ipsec {
+ u16 num_rx_sa;
+ u16 num_tx_sa;
+ struct rx_ip_sa *ip_tbl;
+ struct rx_sa *rx_tbl;
+ struct tx_sa *tx_tbl;
+ DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+#endif /* _IXGBE_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index b3c282d..4242f02 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1282,7 +1282,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
}
void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
- u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
+ u32 fceof_saidx, u32 type_tucmd, u32 mss_l4len_idx)
{
struct ixgbe_adv_tx_context_desc *context_desc;
u16 i = tx_ring->next_to_use;
@@ -1296,7 +1296,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
- context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
+ context_desc->fceof_saidx = cpu_to_le32(fceof_saidx);
context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4f28621..0da5aa2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1171,7 +1171,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_adapter *adapter = q_vector->adapter;
struct ixgbe_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
- unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
unsigned int budget = q_vector->tx.work_limit;
unsigned int i = tx_ring->next_to_clean;
@@ -1202,6 +1202,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
/* update the statistics for this packet */
total_bytes += tx_buffer->bytecount;
total_packets += tx_buffer->gso_segs;
+ if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+ total_ipsec++;
/* free the skb */
if (ring_is_xdp(tx_ring))
@@ -1264,6 +1266,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
u64_stats_update_end(&tx_ring->syncp);
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
+ adapter->tx_ipsec += total_ipsec;
if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */
@@ -1752,6 +1755,9 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
}
+ if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+ ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
+
skb->protocol = eth_type_trans(skb, dev);
/* record Rx queue, or update MACVLAN statistics */
@@ -4127,11 +4133,15 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
rxdctl &= ~0x3FFFFF;
rxdctl |= 0x080420;
#if (PAGE_SIZE < 8192)
- } else {
+ /* RXDCTL.RLPML does not work on 82599 */
+ } else if (hw->mac.type != ixgbe_mac_82599EB) {
rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
IXGBE_RXDCTL_RLPML_EN);
- /* Limit the maximum frame size so we don't overrun the skb */
+ /* Limit the maximum frame size so we don't overrun the skb.
+ * This can happen in SRIOV mode when the MTU of the VF is
+ * higher than the MTU of the PF.
+ */
if (ring_uses_build_skb(ring) &&
!test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB |
@@ -5425,6 +5435,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
ixgbe_set_rx_mode(adapter->netdev);
ixgbe_restore_vlan(adapter);
+ ixgbe_ipsec_restore(adapter);
switch (hw->mac.type) {
case ixgbe_mac_82599EB:
@@ -7252,6 +7263,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
case IXGBE_LINK_SPEED_10GB_FULL:
speed_str = "10 Gbps";
break;
+ case IXGBE_LINK_SPEED_5GB_FULL:
+ speed_str = "5 Gbps";
+ break;
case IXGBE_LINK_SPEED_2_5GB_FULL:
speed_str = "2.5 Gbps";
break;
@@ -7795,10 +7809,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct sk_buff *skb)
}
static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
- struct ixgbe_tx_buffer *first)
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd)
{
struct sk_buff *skb = first->skb;
u32 vlan_macip_lens = 0;
+ u32 fceof_saidx = 0;
u32 type_tucmd = 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -7839,7 +7855,12 @@ no_csum:
vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
- ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0);
+ if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
+ fceof_saidx |= itd->sa_idx;
+ type_tucmd |= itd->flags | itd->trailer_len;
+ }
+
+ ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
}
#define IXGBE_SET_FLAG(_input, _flag, _result) \
@@ -7882,11 +7903,16 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
IXGBE_TX_FLAGS_CSUM,
IXGBE_ADVTXD_POPTS_TXSM);
- /* enble IPv4 checksum for TSO */
+ /* enable IPv4 checksum for TSO */
olinfo_status |= IXGBE_SET_FLAG(tx_flags,
IXGBE_TX_FLAGS_IPV4,
IXGBE_ADVTXD_POPTS_IXSM);
+ /* enable IPsec */
+ olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+ IXGBE_TX_FLAGS_IPSEC,
+ IXGBE_ADVTXD_POPTS_IPSEC);
+
/*
* Check Context must be set if Tx switch is enabled, which it
* always is for case where virtual functions are running
@@ -8350,6 +8376,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
u32 tx_flags = 0;
unsigned short f;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
__be16 protocol = skb->protocol;
u8 hdr_len = 0;
@@ -8454,11 +8481,16 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
}
#endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+ goto out_drop;
+#endif
tso = ixgbe_tso(tx_ring, first, &hdr_len);
if (tso < 0)
goto out_drop;
else if (!tso)
- ixgbe_tx_csum(tx_ring, first);
+ ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
/* add the ATR filter if ATR is on */
if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
@@ -9278,9 +9310,6 @@ free_jump:
static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
struct tc_cls_u32_offload *cls_u32)
{
- if (cls_u32->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_u32->command) {
case TC_CLSU32_NEW_KNODE:
case TC_CLSU32_REPLACE_KNODE:
@@ -9302,7 +9331,7 @@ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct ixgbe_adapter *adapter = cb_priv;
- if (!tc_can_offload(adapter->netdev))
+ if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -9870,6 +9899,12 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
features &= ~NETIF_F_TSO;
+#ifdef CONFIG_XFRM_OFFLOAD
+ /* IPsec offload doesn't get along well with others *yet* */
+ if (skb->sp)
+ features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+#endif
+
return features;
}
@@ -10459,6 +10494,7 @@ skip_sriov:
NETIF_F_FCOE_MTU;
}
#endif /* IXGBE_FCOE */
+ ixgbe_init_ipsec_offload(adapter);
if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
netdev->hw_features |= NETIF_F_LRO;
@@ -10694,6 +10730,7 @@ static void ixgbe_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
+ ixgbe_stop_ipsec_offload(adapter);
ixgbe_clear_interrupt_scheme(adapter);
ixgbe_release_hw_control(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 21eb79a..ca45359 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2360,11 +2360,6 @@ enum {
#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */
-#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000
-#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000
-#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000
/* Multiple Transmit Queue Command Register */
#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */
#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */
@@ -2416,6 +2411,9 @@ enum {
#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */
#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */
#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL 0x08000000 /* overlap ERR_PE */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH 0x10000000 /* overlap ERR_OSE */
+#define IXGBE_RXDADV_ERR_IPSEC_AUTH_FAILED 0x18000000
#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */
#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */
#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */
@@ -2437,6 +2435,7 @@ enum {
#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */
#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE 1588 Time Stamp */
+#define IXGBE_RXDADV_STAT_SECP 0x00020000 /* IPsec/MACsec pkt found */
/* PSRTYPE bit definitions */
#define IXGBE_PSRTYPE_TCPHDR 0x00000010
@@ -2503,13 +2502,6 @@ enum {
#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */
#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */
-/* Security Processing bit Indication */
-#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000
-#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000
-
/* Masks to determine if packets should be dropped due to frame errors */
#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
IXGBE_RXD_ERR_CE | \
@@ -2523,6 +2515,8 @@ enum {
IXGBE_RXDADV_ERR_LE | \
IXGBE_RXDADV_ERR_PE | \
IXGBE_RXDADV_ERR_OSE | \
+ IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL | \
+ IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH | \
IXGBE_RXDADV_ERR_USE)
/* Multicast bit mask */
@@ -2901,7 +2895,7 @@ union ixgbe_adv_rx_desc {
/* Context descriptors */
struct ixgbe_adv_tx_context_desc {
__le32 vlan_macip_lens;
- __le32 seqnum_seed;
+ __le32 fceof_saidx;
__le32 type_tucmd_mlhl;
__le32 mss_l4len_idx;
};
@@ -2932,6 +2926,7 @@ struct ixgbe_adv_tx_context_desc {
IXGBE_ADVTXD_POPTS_SHIFT)
#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */
#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */
#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
@@ -2947,7 +2942,6 @@ struct ixgbe_adv_tx_context_desc {
#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */
#define IXGBE_ADVTXD_TUCMD_L4T_RSV 0x00001800 /* RSV L4 Packet TYPE */
#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /*Req requires Markers and CRC*/
-#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 3bce26e..f470d02 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -949,7 +949,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
u16 length, bufsz, i, start;
u16 *local_buffer;
- bufsz = sizeof(buf) / sizeof(buf[0]);
+ bufsz = ARRAY_SIZE(buf);
/* Read a chunk at the pointer location */
if (!buffer) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index ff9d05f..4400e49 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -75,6 +75,9 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
IXGBEVF_STAT("tx_timeout_count", tx_timeout_count),
IXGBEVF_NETDEV_STAT(multicast),
IXGBEVF_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+ IXGBEVF_STAT("alloc_rx_page", alloc_rx_page),
+ IXGBEVF_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+ IXGBEVF_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
};
#define IXGBEVF_QUEUE_STATS_LEN ( \
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 581f44b..f695242 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -62,7 +62,12 @@ struct ixgbevf_tx_buffer {
struct ixgbevf_rx_buffer {
dma_addr_t dma;
struct page *page;
- unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+ __u32 page_offset;
+#else
+ __u16 page_offset;
+#endif
+ __u16 pagecnt_bias;
};
struct ixgbevf_stats {
@@ -79,6 +84,7 @@ struct ixgbevf_tx_queue_stats {
struct ixgbevf_rx_queue_stats {
u64 alloc_rx_page_failed;
u64 alloc_rx_buff_failed;
+ u64 alloc_rx_page;
u64 csum_err;
};
@@ -260,6 +266,9 @@ static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value)
#define MIN_MSIX_Q_VECTORS 1
#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+#define IXGBEVF_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
/* board specific private data structure */
struct ixgbevf_adapter {
/* this field must be first, see ixgbevf_process_skb_fields */
@@ -287,8 +296,9 @@ struct ixgbevf_adapter {
u64 hw_csum_rx_error;
u64 hw_rx_no_dma_resources;
int num_msix_vectors;
- u32 alloc_rx_page_failed;
- u32 alloc_rx_buff_failed;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
+ u64 alloc_rx_page;
struct msix_entry *msix_entries;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ed5c3ae..9b3d43d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -206,28 +206,6 @@ static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction,
}
}
-static void ixgbevf_unmap_and_free_tx_resource(struct ixgbevf_ring *tx_ring,
- struct ixgbevf_tx_buffer *tx_buffer)
-{
- if (tx_buffer->skb) {
- dev_kfree_skb_any(tx_buffer->skb);
- if (dma_unmap_len(tx_buffer, len))
- dma_unmap_single(tx_ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- } else if (dma_unmap_len(tx_buffer, len)) {
- dma_unmap_page(tx_ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- }
- tx_buffer->next_to_watch = NULL;
- tx_buffer->skb = NULL;
- dma_unmap_len_set(tx_buffer, len, 0);
- /* tx_buffer must be completely set up in the transmit path */
-}
-
static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring *ring)
{
return ring->stats.packets;
@@ -349,7 +327,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
DMA_TO_DEVICE);
/* clear tx_buffer data */
- tx_buffer->skb = NULL;
dma_unmap_len_set(tx_buffer, len, 0);
/* unmap remaining buffers */
@@ -595,8 +572,8 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
}
/* map page for use */
- dma = dma_map_page(rx_ring->dev, page, 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
@@ -604,13 +581,15 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_page(page);
- rx_ring->rx_stats.alloc_rx_buff_failed++;
+ rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
+ bi->pagecnt_bias = 1;
+ rx_ring->rx_stats.alloc_rx_page++;
return true;
}
@@ -639,6 +618,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
if (!ixgbevf_alloc_mapped_page(rx_ring, bi))
break;
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+ bi->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
/* Refresh the desc even if pkt_addr didn't change
* because each write-back erases this info.
*/
@@ -653,8 +638,8 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
i -= rx_ring->count;
}
- /* clear the hdr_addr for the next_to_use descriptor */
- rx_desc->read.hdr_addr = 0;
+ /* clear the length for the next_to_use descriptor */
+ rx_desc->wb.upper.length = 0;
cleaned_count--;
} while (cleaned_count);
@@ -741,12 +726,7 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
new_buff->page = old_buff->page;
new_buff->dma = old_buff->dma;
new_buff->page_offset = old_buff->page_offset;
-
- /* sync the buffer for use by the device */
- dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
- new_buff->page_offset,
- IXGBEVF_RX_BUFSZ,
- DMA_FROM_DEVICE);
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
}
static inline bool ixgbevf_page_is_reserved(struct page *page)
@@ -754,6 +734,45 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
+ struct page *page,
+ const unsigned int truesize)
+{
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+
+ /* avoid re-using remote pages */
+ if (unlikely(ixgbevf_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_ref_count(page) != pagecnt_bias))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
+
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+ return false;
+
+#endif
+
+ /* If we have drained the page fragment pool we need to update
+ * the pagecnt_bias and page count so that we fully restock the
+ * number of references the driver holds.
+ */
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX);
+ rx_buffer->pagecnt_bias = USHRT_MAX;
+ }
+
+ return true;
+}
+
/**
* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
@@ -771,12 +790,12 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
**/
static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
+ u16 size,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
struct page *page = rx_buffer->page;
unsigned char *va = page_address(page) + rx_buffer->page_offset;
- unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
#if (PAGE_SIZE < 8192)
unsigned int truesize = IXGBEVF_RX_BUFSZ;
#else
@@ -795,7 +814,6 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
return true;
/* this page cannot be reused so discard it */
- put_page(page);
return false;
}
@@ -815,32 +833,7 @@ add_tail_frag:
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
(unsigned long)va & ~PAGE_MASK, size, truesize);
- /* avoid re-using remote pages */
- if (unlikely(ixgbevf_page_is_reserved(page)))
- return false;
-
-#if (PAGE_SIZE < 8192)
- /* if we are only owner of page we can reuse it */
- if (unlikely(page_count(page) != 1))
- return false;
-
- /* flip page offset to other buffer */
- rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
-#else
- /* move offset up to the next cache line */
- rx_buffer->page_offset += truesize;
-
- if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
- return false;
-
-#endif
- /* Even if we own the page, we are not allowed to use atomic_set()
- * This would break get_page_unless_zero() users.
- */
- page_ref_inc(page);
-
- return true;
+ return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
}
static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
@@ -849,11 +842,19 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
{
struct ixgbevf_rx_buffer *rx_buffer;
struct page *page;
+ u16 size = le16_to_cpu(rx_desc->wb.upper.length);
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
page = rx_buffer->page;
prefetchw(page);
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
if (likely(!skb)) {
void *page_addr = page_address(page) +
rx_buffer->page_offset;
@@ -879,21 +880,18 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
prefetchw(skb->data);
}
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_buffer->dma,
- rx_buffer->page_offset,
- IXGBEVF_RX_BUFSZ,
- DMA_FROM_DEVICE);
-
/* pull page into skb */
- if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
/* hand second half of page back to the ring */
ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
} else {
- /* we are not reusing the buffer so unmap it */
- dma_unmap_page(rx_ring->dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE);
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+ __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
}
/* clear contents of buffer_info */
@@ -930,7 +928,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.length)
break;
/* This memory barrier is needed to keep us from reading
@@ -943,8 +941,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
/* exit if we failed to retrieve a buffer */
- if (!skb)
+ if (!skb) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
break;
+ }
cleaned_count++;
@@ -1553,6 +1553,10 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter,
txdctl |= (1u << 8) | /* HTHRESH = 1 */
32; /* PTHRESH = 32 */
+ /* reinitialize tx_buffer_info */
+ memset(ring->tx_buffer_info, 0,
+ sizeof(struct ixgbevf_tx_buffer) * ring->count);
+
clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state);
IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), txdctl);
@@ -1721,6 +1725,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *ring)
{
struct ixgbe_hw *hw = &adapter->hw;
+ union ixgbe_adv_rx_desc *rx_desc;
u64 rdba = ring->dma;
u32 rxdctl;
u8 reg_idx = ring->reg_idx;
@@ -1749,6 +1754,14 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0);
ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx);
+ /* initialize rx_buffer_info */
+ memset(ring->rx_buffer_info, 0,
+ sizeof(struct ixgbevf_rx_buffer) * ring->count);
+
+ /* initialize Rx descriptor 0 */
+ rx_desc = IXGBEVF_RX_DESC(ring, 0);
+ rx_desc->wb.upper.length = 0;
+
/* reset ntu and ntc to place SW in sync with hardwdare */
ring->next_to_clean = 0;
ring->next_to_use = 0;
@@ -2103,9 +2116,7 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter)
**/
static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
{
- struct device *dev = rx_ring->dev;
- unsigned long size;
- unsigned int i;
+ u16 i = rx_ring->next_to_clean;
/* Free Rx ring sk_buff */
if (rx_ring->skb) {
@@ -2113,29 +2124,39 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
rx_ring->skb = NULL;
}
- /* ring already cleared, nothing to do */
- if (!rx_ring->rx_buffer_info)
- return;
-
/* Free all the Rx ring pages */
- for (i = 0; i < rx_ring->count; i++) {
+ while (i != rx_ring->next_to_alloc) {
struct ixgbevf_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[i];
- if (rx_buffer->dma)
- dma_unmap_page(dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE);
- rx_buffer->dma = 0;
- if (rx_buffer->page)
- __free_page(rx_buffer->page);
- rx_buffer->page = NULL;
- }
- size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_buffer_info, 0, size);
+ /* Invalidate cache lines that may have been written to by
+ * device so that we avoid corrupting memory.
+ */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ /* free resources associated with mapping */
+ dma_unmap_page_attrs(rx_ring->dev,
+ rx_buffer->dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
- /* Zero out the descriptor ring */
- memset(rx_ring->desc, 0, rx_ring->size);
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
+ }
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
}
/**
@@ -2144,23 +2165,57 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
**/
static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
{
- struct ixgbevf_tx_buffer *tx_buffer_info;
- unsigned long size;
- unsigned int i;
+ u16 i = tx_ring->next_to_clean;
+ struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
- if (!tx_ring->tx_buffer_info)
- return;
+ while (i != tx_ring->next_to_use) {
+ union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- for (i = 0; i < tx_ring->count; i++) {
- tx_buffer_info = &tx_ring->tx_buffer_info[i];
- ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* check for eop_desc to determine the end of the packet */
+ eop_desc = tx_buffer->next_to_watch;
+ tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ }
}
- size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
- memset(tx_ring->tx_buffer_info, 0, size);
+ /* reset next_to_use and next_to_clean */
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
- memset(tx_ring->desc, 0, tx_ring->size);
}
/**
@@ -2712,6 +2767,8 @@ out:
void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 alloc_rx_page = 0, hw_csum_rx_error = 0;
int i;
if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
@@ -2732,10 +2789,18 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
adapter->stats.vfmprc);
for (i = 0; i < adapter->num_rx_queues; i++) {
- adapter->hw_csum_rx_error +=
- adapter->rx_ring[i]->hw_csum_rx_error;
- adapter->rx_ring[i]->hw_csum_rx_error = 0;
+ struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+ hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+ alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+ alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+ alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
}
+
+ adapter->hw_csum_rx_error = hw_csum_rx_error;
+ adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+ adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+ adapter->alloc_rx_page = alloc_rx_page;
}
/**
@@ -2980,7 +3045,7 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
int size;
size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
- tx_ring->tx_buffer_info = vzalloc(size);
+ tx_ring->tx_buffer_info = vmalloc(size);
if (!tx_ring->tx_buffer_info)
goto err;
@@ -3040,7 +3105,7 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
int size;
size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
- rx_ring->rx_buffer_info = vzalloc(size);
+ rx_ring->rx_buffer_info = vmalloc(size);
if (!rx_ring->rx_buffer_info)
goto err;
@@ -3482,34 +3547,37 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
struct ixgbevf_tx_buffer *first,
const u8 hdr_len)
{
- dma_addr_t dma;
struct sk_buff *skb = first->skb;
struct ixgbevf_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
- unsigned int data_len = skb->data_len;
- unsigned int size = skb_headlen(skb);
- unsigned int paylen = skb->len - hdr_len;
+ struct skb_frag_struct *frag;
+ dma_addr_t dma;
+ unsigned int data_len, size;
u32 tx_flags = first->tx_flags;
- __le32 cmd_type;
+ __le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags);
u16 i = tx_ring->next_to_use;
tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
- ixgbevf_tx_olinfo_status(tx_desc, tx_flags, paylen);
- cmd_type = ixgbevf_tx_cmd_type(tx_flags);
+ ixgbevf_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
- goto dma_error;
- /* record length, and DMA address */
- dma_unmap_len_set(first, len, size);
- dma_unmap_addr_set(first, dma, dma);
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
- tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
- for (;;) {
while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) {
tx_desc->read.cmd_type_len =
cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD);
@@ -3520,12 +3588,12 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
i = 0;
}
+ tx_desc->read.olinfo_status = 0;
dma += IXGBE_MAX_DATA_PER_TXD;
size -= IXGBE_MAX_DATA_PER_TXD;
tx_desc->read.buffer_addr = cpu_to_le64(dma);
- tx_desc->read.olinfo_status = 0;
}
if (likely(!data_len))
@@ -3539,23 +3607,15 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
i = 0;
}
+ tx_desc->read.olinfo_status = 0;
size = skb_frag_size(frag);
data_len -= size;
dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
- goto dma_error;
tx_buffer = &tx_ring->tx_buffer_info[i];
- dma_unmap_len_set(tx_buffer, len, size);
- dma_unmap_addr_set(tx_buffer, dma, dma);
-
- tx_desc->read.buffer_addr = cpu_to_le64(dma);
- tx_desc->read.olinfo_status = 0;
-
- frag++;
}
/* write last descriptor with RS and EOP bits */
@@ -3589,18 +3649,32 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
return;
dma_error:
dev_err(tx_ring->dev, "TX DMA map failed\n");
+ tx_buffer = &tx_ring->tx_buffer_info[i];
/* clear dma mappings for failed tx_buffer_info map */
- for (;;) {
+ while (tx_buffer != first) {
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ if (i-- == 0)
+ i += tx_ring->count;
tx_buffer = &tx_ring->tx_buffer_info[i];
- ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer);
- if (tx_buffer == first)
- break;
- if (i == 0)
- i = tx_ring->count;
- i--;
}
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ dev_kfree_skb_any(tx_buffer->skb);
+ tx_buffer->skb = NULL;
+
tx_ring->next_to_use = i;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 64c93e8..38d3a32 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -286,7 +286,7 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
ether_addr_copy(msg_addr, addr);
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (!ret_val) {
msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
@@ -456,8 +456,7 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
ether_addr_copy(msg_addr, addr);
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
-
+ ARRAY_SIZE(msgbuf));
msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
/* if nacked the address was rejected, use "perm_addr" */
@@ -574,7 +573,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
msgbuf[1] = xcast_mode;
err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (err)
return err;
@@ -614,7 +613,7 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (err)
goto mbx_err;
@@ -826,7 +825,7 @@ static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
msgbuf[1] = max_size;
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (ret_val)
return ret_val;
if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
@@ -872,8 +871,7 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
msg[1] = api;
msg[2] = 0;
- err = ixgbevf_write_msg_read_ack(hw, msg, msg,
- sizeof(msg) / sizeof(u32));
+ err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
if (!err) {
msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
@@ -924,8 +922,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
msg[0] = IXGBE_VF_GET_QUEUE;
msg[1] = msg[2] = msg[3] = msg[4] = 0;
- err = ixgbevf_write_msg_read_ack(hw, msg, msg,
- sizeof(msg) / sizeof(u32));
+ err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
if (!err) {
msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8530c77..47bab84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2944,9 +2944,6 @@ out:
static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower);
@@ -2964,7 +2961,7 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_can_offload(priv->netdev))
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 10fa6a1..363d8dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -719,9 +719,6 @@ static int
mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower);
@@ -739,7 +736,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_can_offload(priv->netdev))
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 833cd0a..3dcc58d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1738,9 +1738,6 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_cls_matchall_offload *f,
bool ingress)
{
- if (f->common.chain_index)
- return -EOPNOTSUPP;
-
switch (f->command) {
case TC_CLSMATCHALL_REPLACE:
return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
@@ -1780,7 +1777,8 @@ static int mlxsw_sp_setup_tc_block_cb_matchall(enum tc_setup_type type,
switch (type) {
case TC_SETUP_CLSMATCHALL:
- if (!tc_can_offload(mlxsw_sp_port->dev))
+ if (!tc_cls_can_offload_and_chain0(mlxsw_sp_port->dev,
+ type_data))
return -EOPNOTSUPP;
return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 91170d9..f0b25ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1968,11 +1968,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
dipn = htonl(dip);
dev = mlxsw_sp->router->rifs[rif]->dev;
n = neigh_lookup(&arp_tbl, &dipn, dev);
- if (!n) {
- netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
- &dip);
+ if (!n)
return;
- }
netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
neigh_event_send(n, NULL);
@@ -1999,11 +1996,8 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
dev = mlxsw_sp->router->rifs[rif]->dev;
n = neigh_lookup(&nd_tbl, &dip, dev);
- if (!n) {
- netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
- &dip);
+ if (!n)
return;
- }
netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
neigh_event_send(n, NULL);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index b320685..3220277 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -130,7 +130,7 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
"only offload of BPF classifiers supported");
return -EOPNOTSUPP;
}
- if (!tc_can_offload_extack(nn->dp.netdev, cls_bpf->common.extack))
+ if (!tc_cls_can_offload_and_chain0(nn->dp.netdev, &cls_bpf->common))
return -EOPNOTSUPP;
if (!nfp_net_ebpf_capable(nn)) {
NL_SET_ERR_MSG_MOD(cls_bpf->common.extack,
@@ -142,8 +142,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
"only ETH_P_ALL supported as filter protocol");
return -EOPNOTSUPP;
}
- if (cls_bpf->common.chain_index)
- return -EOPNOTSUPP;
/* Only support TC direct action */
if (!cls_bpf->exts_integrated ||
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 837134a..08c4c6d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -483,8 +483,7 @@ static int
nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
struct tc_cls_flower_offload *flower, bool egress)
{
- if (!eth_proto_is_802_3(flower->common.protocol) ||
- flower->common.chain_index)
+ if (!eth_proto_is_802_3(flower->common.protocol))
return -EOPNOTSUPP;
switch (flower->command) {
@@ -504,7 +503,7 @@ int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
{
struct nfp_repr *repr = cb_priv;
- if (!tc_can_offload(repr->netdev))
+ if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -521,7 +520,7 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
{
struct nfp_repr *repr = cb_priv;
- if (!tc_can_offload(repr->netdev))
+ if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index a3f6d51..66c665d 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -795,7 +795,7 @@ struct fe_priv {
*/
union ring_type get_rx, put_rx, last_rx;
struct nv_skb_map *get_rx_ctx, *put_rx_ctx;
- struct nv_skb_map *first_rx_ctx, *last_rx_ctx;
+ struct nv_skb_map *last_rx_ctx;
struct nv_skb_map *rx_skb;
union ring_type rx_ring;
@@ -1835,7 +1835,7 @@ static int nv_alloc_rx(struct net_device *dev)
if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
np->put_rx.orig = np->rx_ring.orig;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
- np->put_rx_ctx = np->first_rx_ctx;
+ np->put_rx_ctx = np->rx_skb;
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
@@ -1877,7 +1877,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
np->put_rx.ex = np->rx_ring.ex;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
- np->put_rx_ctx = np->first_rx_ctx;
+ np->put_rx_ctx = np->rx_skb;
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
@@ -1910,7 +1910,8 @@ static void nv_init_rx(struct net_device *dev)
np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1];
else
np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1];
- np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb;
+ np->get_rx_ctx = np->rx_skb;
+ np->put_rx_ctx = np->rx_skb;
np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1];
for (i = 0; i < np->rx_ring_size; i++) {
@@ -2914,7 +2915,7 @@ next_pkt:
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
np->get_rx.orig = np->rx_ring.orig;
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
- np->get_rx_ctx = np->first_rx_ctx;
+ np->get_rx_ctx = np->rx_skb;
rx_work++;
}
@@ -3003,7 +3004,7 @@ next_pkt:
if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
np->get_rx.ex = np->rx_ring.ex;
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
- np->get_rx_ctx = np->first_rx_ctx;
+ np->get_rx_ctx = np->rx_skb;
rx_work++;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index bdc46f1..5d040b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -358,10 +358,27 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
kfree(p_rdma_info);
}
+static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static void qed_rdma_free_reserved_lkey(struct qed_hwfn *p_hwfn)
+{
+ qed_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey);
+}
+
static void qed_rdma_free(struct qed_hwfn *p_hwfn)
{
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
+ qed_rdma_free_reserved_lkey(p_hwfn);
qed_rdma_resc_free(p_hwfn);
}
@@ -615,9 +632,6 @@ static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn)
{
struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
- /* The first DPI is reserved for the Kernel */
- __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap);
-
/* Tid 0 will be used as the key for "reserved MR".
* The driver should allocate memory for it so it can be loaded but no
* ramrod should be passed on it.
@@ -797,17 +811,6 @@ static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
return p_hwfn->p_rdma_info->dev;
}
-static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
-{
- struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-
- DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
-
- spin_lock_bh(&p_hwfn->p_rdma_info->lock);
- qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
- spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
-}
-
static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
{
struct qed_hwfn *p_hwfn;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
index 5028fb4..4beedb8 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
@@ -114,8 +114,9 @@ struct emac_tpd {
#define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val)
/* High-14bit Buffer Address, So, the 64b-bit address is
* {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L}
+ * Extend TPD_BUFFER_ADDR_H to [31, 18], because we never enable timestamping.
*/
-#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val)
+#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 31, val)
/* Format D. Word offset from the 1st byte of this packet to start to calculate
* the custom checksum.
*/
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 38c924bd..13235ba 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -615,8 +615,11 @@ static int emac_probe(struct platform_device *pdev)
u32 reg;
int ret;
- /* The TPD buffer address is limited to 45 bits. */
- ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45));
+ /* The TPD buffer address is limited to:
+ * 1. PTP: 45bits. (Driver doesn't support yet.)
+ * 2. NON-PTP: 46bits.
+ */
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(46));
if (ret) {
dev_err(&pdev->dev, "could not set DMA mask\n");
return ret;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6d6fb8c..6473cc6 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -789,7 +789,6 @@ static int ofdpa_flow_tbl_add(struct ofdpa_port *ofdpa_port,
ofdpa_flags_nowait(flags),
ofdpa_cmd_flow_tbl_add,
found, NULL, NULL);
- return 0;
}
static int ofdpa_flow_tbl_del(struct ofdpa_port *ofdpa_port,
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 8ae467d..75fbf58 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -322,6 +322,25 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
return 0;
}
+static void efx_ef10_read_licensed_features(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_LICENSING_V3_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_LICENSING_V3_OUT_LEN);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, LICENSING_V3_IN_OP,
+ MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_LICENSING_V3, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc || (outlen < MC_CMD_LICENSING_V3_OUT_LEN))
+ return;
+
+ nic_data->licensed_features = MCDI_QWORD(outbuf,
+ LICENSING_V3_OUT_LICENSED_FEATURES);
+}
+
static int efx_ef10_get_sysclk_freq(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN);
@@ -722,6 +741,8 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc < 0)
goto fail5;
+ efx_ef10_read_licensed_features(efx);
+
/* We can have one VI for each vi_stride-byte region.
* However, until we use TX option descriptors we need two TX queues
* per channel.
@@ -760,14 +781,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc && rc != -EPERM)
goto fail5;
- rc = efx_ptp_probe(efx, NULL);
- /* Failure to probe PTP is not fatal.
- * In the case of EPERM, efx_ptp_probe will print its own message (in
- * efx_ptp_get_attributes()), so we don't need to.
- */
- if (rc && rc != -EPERM)
- netif_warn(efx, drv, efx->net_dev,
- "Failed to probe PTP, rc=%d\n", rc);
+ efx_ptp_defer_probe_with_channel(efx);
#ifdef CONFIG_SFC_SRIOV
if ((efx->pci_dev->physfn) && (!efx->pci_dev->is_physfn)) {
@@ -937,6 +951,11 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
/* Link a buffer to each TX queue */
efx_for_each_channel(channel, efx) {
+ /* Extra channels, even those with TXQs (PTP), do not require
+ * PIO resources.
+ */
+ if (!channel->type->want_pio)
+ continue;
efx_for_each_channel_tx_queue(tx_queue, channel) {
/* We assign the PIO buffers to queues in
* reverse order to allow for the following
@@ -1284,7 +1303,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
void __iomem *membase;
int rc;
- channel_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
+ channel_vis = max(efx->n_channels,
+ (efx->n_tx_channels + efx->n_extra_tx_channels) *
+ EFX_TXQ_TYPES);
#ifdef EFX_USE_PIO
/* Try to allocate PIO buffers if wanted and if the full
@@ -2408,12 +2429,25 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
int i;
BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+ /* Only attempt to enable TX timestamping if we have the license for it,
+ * otherwise TXQ init will fail
+ */
+ if (!(nic_data->licensed_features &
+ (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) {
+ tx_queue->timestamping = false;
+ /* Disable sync events on this channel. */
+ if (efx->type->ptp_set_ts_sync_events)
+ efx->type->ptp_set_ts_sync_events(efx, false, false);
+ }
+
/* TSOv2 is a limited resource that can only be configured on a limited
* number of queues. TSO without checksum offload is not really a thing,
* so we only enable it for those queues.
+ * TSOv2 cannot be used with Hardware timestamping.
*/
if (csum_offload && (nic_data->datapath_caps2 &
- (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
+ !tx_queue->timestamping) {
tso_v2 = true;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel);
@@ -2439,14 +2473,16 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
do {
- MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
/* This flag was removed from mcdi_pcol.h for
* the non-_EXT version of INIT_TXQ. However,
* firmware still honours it.
*/
INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
- INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
+ tx_queue->timestamping);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
NULL, 0, NULL);
@@ -2472,12 +2508,13 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
tx_queue->insert_count = 1;
txd = efx_tx_desc(tx_queue, 0);
- EFX_POPULATE_QWORD_4(*txd,
+ EFX_POPULATE_QWORD_5(*txd,
ESF_DZ_TX_DESC_IS_OPT, true,
ESF_DZ_TX_OPTION_TYPE,
ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
- ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
+ ESF_DZ_TX_OPTION_IP_CSUM, csum_offload,
+ ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
tx_queue->write_count = 1;
if (tso_v2) {
@@ -3572,31 +3609,92 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel,
return n_packets;
}
-static int
+static u32 efx_ef10_extract_event_ts(efx_qword_t *event)
+{
+ u32 tstamp;
+
+ tstamp = EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_HI);
+ tstamp <<= 16;
+ tstamp |= EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_LO);
+
+ return tstamp;
+}
+
+static void
efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
{
struct efx_nic *efx = channel->efx;
struct efx_tx_queue *tx_queue;
unsigned int tx_ev_desc_ptr;
unsigned int tx_ev_q_label;
- int tx_descs = 0;
+ unsigned int tx_ev_type;
+ u64 ts_part;
if (unlikely(READ_ONCE(efx->reset_pending)))
- return 0;
+ return;
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
- return 0;
+ return;
- /* Transmit completion */
- tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
+ /* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
tx_queue = efx_channel_get_tx_queue(channel,
tx_ev_q_label % EFX_TXQ_TYPES);
- tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) &
- tx_queue->ptr_mask);
- efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
- return tx_descs;
+ if (!tx_queue->timestamping) {
+ /* Transmit completion */
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
+ efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
+ return;
+ }
+
+ /* Transmit timestamps are only available for 8XXX series. They result
+ * in three events per packet. These occur in order, and are:
+ * - the normal completion event
+ * - the low part of the timestamp
+ * - the high part of the timestamp
+ *
+ * Each part of the timestamp is itself split across two 16 bit
+ * fields in the event.
+ */
+ tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1);
+
+ switch (tx_ev_type) {
+ case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION:
+ /* In case of Queue flush or FLR, we might have received
+ * the previous TX completion event but not the Timestamp
+ * events.
+ */
+ if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask)
+ efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
+
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event,
+ ESF_DZ_TX_DESCR_INDX);
+ tx_queue->completed_desc_ptr =
+ tx_ev_desc_ptr & tx_queue->ptr_mask;
+ break;
+
+ case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO:
+ ts_part = efx_ef10_extract_event_ts(event);
+ tx_queue->completed_timestamp_minor = ts_part;
+ break;
+
+ case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI:
+ ts_part = efx_ef10_extract_event_ts(event);
+ tx_queue->completed_timestamp_major = ts_part;
+
+ efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ break;
+
+ default:
+ netif_err(efx, hw, efx->net_dev,
+ "channel %d unknown tx event type %d (data "
+ EFX_QWORD_FMT ")\n",
+ channel->channel, tx_ev_type,
+ EFX_QWORD_VAL(*event));
+ break;
+ }
}
static void
@@ -3658,7 +3756,6 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
efx_qword_t event, *p_event;
unsigned int read_ptr;
int ev_code;
- int tx_descs = 0;
int spent = 0;
if (quota <= 0)
@@ -3698,13 +3795,7 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
}
break;
case ESE_DZ_EV_CODE_TX_EV:
- tx_descs += efx_ef10_handle_tx_event(channel, &event);
- if (tx_descs > efx->txq_entries) {
- spent = quota;
- goto out;
- } else if (++spent == quota) {
- goto out;
- }
+ efx_ef10_handle_tx_event(channel, &event);
break;
case ESE_DZ_EV_CODE_DRIVER_EV:
efx_ef10_handle_driver_event(channel, &event);
@@ -6179,7 +6270,8 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en,
efx_ef10_rx_enable_timestamping :
efx_ef10_rx_disable_timestamping;
- efx_for_each_channel(channel, efx) {
+ channel = efx_ptp_channel(efx);
+ if (channel) {
int rc = set(channel, temp);
if (en && rc != 0) {
efx_ef10_ptp_set_ts_sync_events(efx, false, temp);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 12f0abc..456866b 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -896,12 +896,20 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
}
+bool efx_default_channel_want_txqs(struct efx_channel *channel)
+{
+ return channel->channel - channel->efx->tx_channel_offset <
+ channel->efx->n_tx_channels;
+}
+
static const struct efx_channel_type efx_default_channel_type = {
.pre_probe = efx_channel_dummy_op_int,
.post_remove = efx_channel_dummy_op_void,
.get_name = efx_get_channel_name,
.copy = efx_copy_channel,
+ .want_txqs = efx_default_channel_want_txqs,
.keep_eventq = false,
+ .want_pio = true,
};
int efx_channel_dummy_op_int(struct efx_channel *channel)
@@ -1501,6 +1509,7 @@ static int efx_probe_interrupts(struct efx_nic *efx)
}
/* Assign extra channels if possible */
+ efx->n_extra_tx_channels = 0;
j = efx->n_channels;
for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
if (!efx->extra_channel_type[i])
@@ -1512,6 +1521,8 @@ static int efx_probe_interrupts(struct efx_nic *efx)
--j;
efx_get_channel(efx, j)->type =
efx->extra_channel_type[i];
+ if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
+ efx->n_extra_tx_channels++;
}
}
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 5334dc8..266b9be 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -818,17 +818,16 @@ static void efx_farch_magic_event(struct efx_channel *channel, u32 magic)
* The NIC batches TX completion events; the message we receive is of
* the form "complete all TX events up to this index".
*/
-static int
+static void
efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
{
unsigned int tx_ev_desc_ptr;
unsigned int tx_ev_q_label;
struct efx_tx_queue *tx_queue;
struct efx_nic *efx = channel->efx;
- int tx_packets = 0;
if (unlikely(READ_ONCE(efx->reset_pending)))
- return 0;
+ return;
if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
/* Transmit completion */
@@ -836,8 +835,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
tx_queue = efx_channel_get_tx_queue(
channel, tx_ev_q_label % EFX_TXQ_TYPES);
- tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) &
- tx_queue->ptr_mask);
efx_xmit_done(tx_queue, tx_ev_desc_ptr);
} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
/* Rewrite the FIFO write pointer */
@@ -856,8 +853,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
EFX_QWORD_FMT"\n", channel->channel,
EFX_QWORD_VAL(*event));
}
-
- return tx_packets;
}
/* Detect errors included in the rx_evt_pkt_ok bit. */
@@ -1090,7 +1085,7 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
int qid;
qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
- if (qid < EFX_TXQ_TYPES * efx->n_tx_channels) {
+ if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES,
qid % EFX_TXQ_TYPES);
if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
@@ -1270,7 +1265,6 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget)
unsigned int read_ptr;
efx_qword_t event, *p_event;
int ev_code;
- int tx_packets = 0;
int spent = 0;
if (budget <= 0)
@@ -1304,12 +1298,7 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget)
goto out;
break;
case FSE_AZ_EV_CODE_TX_EV:
- tx_packets += efx_farch_handle_tx_event(channel,
- &event);
- if (tx_packets > efx->txq_entries) {
- spent = budget;
- goto out;
- }
+ efx_farch_handle_tx_event(channel, &event);
break;
case FSE_AZ_EV_CODE_DRV_GEN_EV:
efx_farch_handle_generated_event(channel, &event);
@@ -1680,20 +1669,21 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
*/
void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
{
- unsigned vi_count, buftbl_min;
+ unsigned vi_count, buftbl_min, total_tx_channels;
#ifdef CONFIG_SFC_SRIOV
struct siena_nic_data *nic_data = efx->nic_data;
#endif
+ total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels;
/* Account for the buffer table entries backing the datapath channels
* and the descriptor caches for those channels.
*/
buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
- efx->n_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE +
+ total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE +
efx->n_channels * EFX_MAX_EVQ_SIZE)
* sizeof(efx_qword_t) / EFX_BUF_SIZE);
- vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
+ vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES);
#ifdef CONFIG_SFC_SRIOV
if (efx->type->sriov_wanted) {
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 3dd42f3..d20a866 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -191,6 +191,7 @@ struct efx_tx_buffer {
* Size of the region is efx_piobuf_size.
* @piobuf_offset: Buffer offset to be specified in PIO descriptors
* @initialised: Has hardware queue been initialised?
+ * @timestamping: Is timestamping enabled for this channel?
* @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and
* may also map tx data, depending on the nature of the TSO implementation.
* @read_count: Current read pointer.
@@ -202,6 +203,10 @@ struct efx_tx_buffer {
* avoid cache-line ping-pong between the xmit path and the
* completion path.
* @merge_events: Number of TX merged completion events
+ * @completed_desc_ptr: Most recent completed pointer - only used with
+ * timestamping.
+ * @completed_timestamp_major: Top part of the most recent tx timestamp.
+ * @completed_timestamp_minor: Low part of the most recent tx timestamp.
* @insert_count: Current insert pointer
* This is the number of buffers that have been added to the
* software ring.
@@ -247,6 +252,7 @@ struct efx_tx_queue {
void __iomem *piobuf;
unsigned int piobuf_offset;
bool initialised;
+ bool timestamping;
/* Function pointers used in the fast path. */
int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
@@ -257,6 +263,9 @@ struct efx_tx_queue {
unsigned int merge_events;
unsigned int bytes_compl;
unsigned int pkts_compl;
+ unsigned int completed_desc_ptr;
+ u32 completed_timestamp_major;
+ u32 completed_timestamp_minor;
/* Members used only on the xmit path */
unsigned int insert_count ____cacheline_aligned_in_smp;
@@ -522,8 +531,12 @@ struct efx_msi_context {
* @copy: Copy the channel state prior to reallocation. May be %NULL if
* reallocation is not supported.
* @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
+ * @want_txqs: Determine whether this channel should have TX queues
+ * created. If %NULL, TX queues are not created.
* @keep_eventq: Flag for whether event queue should be kept initialised
* while the device is stopped
+ * @want_pio: Flag for whether PIO buffers should be linked to this
+ * channel's TX queues.
*/
struct efx_channel_type {
void (*handle_no_channel)(struct efx_nic *);
@@ -532,7 +545,9 @@ struct efx_channel_type {
void (*get_name)(struct efx_channel *, char *buf, size_t len);
struct efx_channel *(*copy)(const struct efx_channel *);
bool (*receive_skb)(struct efx_channel *, struct sk_buff *);
+ bool (*want_txqs)(struct efx_channel *);
bool keep_eventq;
+ bool want_pio;
};
enum efx_led_mode {
@@ -735,6 +750,7 @@ struct vfdi_status;
* @n_channels: Number of channels in use
* @n_rx_channels: Number of channels used for RX (= number of RX queues)
* @n_tx_channels: Number of channels used for TX
+ * @n_extra_tx_channels: Number of extra channels with TX queues
* @rx_ip_align: RX DMA address offset to have IP header aligned in
* in accordance with NET_IP_ALIGN
* @rx_dma_len: Current maximum RX DMA length
@@ -881,6 +897,7 @@ struct efx_nic {
unsigned rss_spread;
unsigned tx_channel_offset;
unsigned n_tx_channels;
+ unsigned n_extra_tx_channels;
unsigned int rx_ip_align;
unsigned int rx_dma_len;
unsigned int rx_buffer_order;
@@ -1363,8 +1380,8 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
{
- return channel->channel - channel->efx->tx_channel_offset <
- channel->efx->n_tx_channels;
+ return channel->type && channel->type->want_txqs &&
+ channel->type->want_txqs(channel);
}
static inline struct efx_tx_queue *
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 7630522..6549fc6 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -440,6 +440,7 @@ struct efx_ef10_nic_data {
struct efx_udp_tunnel udp_tunnels[16];
bool udp_tunnels_dirty;
struct mutex udp_tunnels_lock;
+ u64 licensed_features;
};
int efx_init_sriov(void);
@@ -448,6 +449,7 @@ void efx_fini_sriov(void);
struct ethtool_ts_info;
int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
void efx_ptp_remove(struct efx_nic *efx);
int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
@@ -471,6 +473,8 @@ static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel,
}
void efx_ptp_start_datapath(struct efx_nic *efx);
void efx_ptp_stop_datapath(struct efx_nic *efx);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx);
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue);
extern const struct efx_nic_type falcon_a1_nic_type;
extern const struct efx_nic_type falcon_b0_nic_type;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 3b37d7d..f1cc2ed 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -149,18 +149,14 @@ enum ptp_packet_state {
/* Maximum parts-per-billion adjustment that is acceptable */
#define MAX_PPB 1000000
-/* Number of bits required to hold the above */
-#define MAX_PPB_BITS 20
-
-/* Number of extra bits allowed when calculating fractional ns.
- * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should
- * be less than 63.
- */
-#define PPB_EXTRA_BITS 2
-
/* Precalculate scale word to avoid long long division at runtime */
-#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\
- MAX_PPB_BITS)) / 1000000000LL)
+/* This is equivalent to 2^66 / 10^9. */
+#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL)
+
+/* How much to shift down after scaling to convert to FP40 */
+#define PPB_SHIFT_FP40 26
+/* ... and FP44. */
+#define PPB_SHIFT_FP44 22
#define PTP_SYNC_ATTEMPTS 4
@@ -218,8 +214,8 @@ struct efx_ptp_timeset {
* @channel: The PTP channel (Siena only)
* @rx_ts_inline: Flag for whether RX timestamps are inline (else they are
* separate events)
- * @rxq: Receive queue (awaiting timestamps)
- * @txq: Transmit queue
+ * @rxq: Receive SKB queue (awaiting timestamps)
+ * @txq: Transmit SKB queue
* @evt_list: List of MC receive events awaiting packets
* @evt_free_list: List of free events
* @evt_lock: Lock for manipulating evt_list and evt_free_list
@@ -233,19 +229,36 @@ struct efx_ptp_timeset {
* @config: Current timestamp configuration
* @enabled: PTP operation enabled
* @mode: Mode in which PTP operating (PTP version)
- * @time_format: Time format supported by this NIC
* @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time
* @nic_to_kernel_time: Function to convert from NIC to kernel time
+ * @nic_time.minor_max: Wrap point for NIC minor times
+ * @nic_time.sync_event_diff_min: Minimum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much earlier than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_diff_max: Maximum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much later than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_minor_shift: Shift required to make minor time from
+ * field in MCDI time sync event.
* @min_synchronisation_ns: Minimum acceptable corrected sync window
- * @ts_corrections.tx: Required driver correction of transmit timestamps
- * @ts_corrections.rx: Required driver correction of receive timestamps
+ * @capabilities: Capabilities flags from the NIC
+ * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit
+ * timestamps
+ * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive
+ * timestamps
* @ts_corrections.pps_out: PPS output error (information only)
* @ts_corrections.pps_in: Required driver correction of PPS input timestamps
+ * @ts_corrections.general_tx: Required driver correction of general packet
+ * transmit timestamps
+ * @ts_corrections.general_rx: Required driver correction of general packet
+ * receive timestamps
* @evt_frags: Partly assembled PTP events
* @evt_frag_idx: Current fragment number
* @evt_code: Last event code
* @start: Address at which MC indicates ready for synchronisation
* @host_time_pps: Host time at last PPS
+ * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion
+ * frequency adjustment into a fixed point fractional nanosecond format.
* @current_adjfreq: Current ppb adjustment.
* @phc_clock: Pointer to registered phc device (if primary function)
* @phc_clock_info: Registration structure for phc device
@@ -264,6 +277,7 @@ struct efx_ptp_timeset {
* @oversize_sync_windows: Number of corrected sync windows that are too large
* @rx_no_timestamp: Number of packets received without a timestamp.
* @timeset: Last set of synchronisation statistics.
+ * @xmit_skb: Transmit SKB function.
*/
struct efx_ptp_data {
struct efx_nic *efx;
@@ -284,22 +298,31 @@ struct efx_ptp_data {
struct hwtstamp_config config;
bool enabled;
unsigned int mode;
- unsigned int time_format;
void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor);
ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor,
s32 correction);
+ struct {
+ u32 minor_max;
+ u32 sync_event_diff_min;
+ u32 sync_event_diff_max;
+ unsigned int sync_event_minor_shift;
+ } nic_time;
unsigned int min_synchronisation_ns;
+ unsigned int capabilities;
struct {
- s32 tx;
- s32 rx;
+ s32 ptp_tx;
+ s32 ptp_rx;
s32 pps_out;
s32 pps_in;
+ s32 general_tx;
+ s32 general_rx;
} ts_corrections;
efx_qword_t evt_frags[MAX_EVENT_FRAGS];
int evt_frag_idx;
int evt_code;
struct efx_buffer start;
struct pps_event_time host_time_pps;
+ unsigned int adjfreq_ppb_shift;
s64 current_adjfreq;
struct ptp_clock *phc_clock;
struct ptp_clock_info phc_clock_info;
@@ -319,6 +342,7 @@ struct efx_ptp_data {
unsigned int rx_no_timestamp;
struct efx_ptp_timeset
timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM];
+ void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb);
};
static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta);
@@ -329,6 +353,24 @@ static int efx_phc_settime(struct ptp_clock_info *ptp,
static int efx_phc_enable(struct ptp_clock_info *ptp,
struct ptp_clock_request *request, int on);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ return ((efx_nic_rev(efx) >= EFX_REV_HUNT_A0) &&
+ (nic_data->datapath_caps2 &
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN)
+ ));
+}
+
+/* PTP 'extra' channel is still a traffic channel, but we only create TX queues
+ * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit.
+ */
+bool efx_ptp_want_txqs(struct efx_channel *channel)
+{
+ return efx_ptp_use_mac_tx_timestamps(channel->efx);
+}
+
#define PTP_SW_STAT(ext_name, field_name) \
{ #ext_name, 0, offsetof(struct efx_ptp_data, field_name) }
#define PTP_MC_STAT(ext_name, mcdi_name) \
@@ -471,6 +513,89 @@ static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor,
return efx_ptp_s27_to_ktime(nic_major, nic_minor);
}
+/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */
+static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor)
+{
+ struct timespec64 ts = ns_to_timespec64(ns);
+
+ *nic_major = (u32)ts.tv_sec;
+ *nic_minor = ts.tv_nsec * 4;
+}
+
+static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt;
+
+ nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4);
+ correction = DIV_ROUND_CLOSEST(correction, 4);
+
+ kt = ktime_set(nic_major, nic_minor);
+
+ if (correction >= 0)
+ kt = ktime_add_ns(kt, (u64)correction);
+ else
+ kt = ktime_sub_ns(kt, (u64)-correction);
+ return kt;
+}
+
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
+{
+ return efx->ptp_data ? efx->ptp_data->channel : NULL;
+}
+
+static u32 last_sync_timestamp_major(struct efx_nic *efx)
+{
+ struct efx_channel *channel = efx_ptp_channel(efx);
+ u32 major = 0;
+
+ if (channel)
+ major = channel->sync_timestamp_major;
+ return major;
+}
+
+/* The 8000 series and later can provide the time from the MAC, which is only
+ * 48 bits long and provides meta-information in the top 2 bits.
+ */
+static ktime_t
+efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx,
+ struct efx_ptp_data *ptp,
+ u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt = { 0 };
+
+ if (!(nic_major & 0x80000000)) {
+ WARN_ON_ONCE(nic_major >> 16);
+ /* Use the top bits from the latest sync event. */
+ nic_major &= 0xffff;
+ nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000);
+
+ kt = ptp->nic_to_kernel_time(nic_major, nic_minor,
+ correction);
+ }
+ return kt;
+}
+
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ ktime_t kt;
+
+ if (efx_ptp_use_mac_tx_timestamps(efx))
+ kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp,
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ else
+ kt = ptp->nic_to_kernel_time(
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ return kt;
+}
+
/* Get PTP attributes and set up time conversions */
static int efx_ptp_get_attributes(struct efx_nic *efx)
{
@@ -502,31 +627,71 @@ static int efx_ptp_get_attributes(struct efx_nic *efx)
return rc;
}
- if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION) {
+ switch (fmt) {
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION:
ptp->ns_to_nic_time = efx_ptp_ns_to_s27;
ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction;
- } else if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS) {
+ ptp->nic_time.minor_max = 1 << 27;
+ ptp->nic_time.sync_event_minor_shift = 19;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS:
ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns;
ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction;
- } else {
+ ptp->nic_time.minor_max = 1000000000;
+ ptp->nic_time.sync_event_minor_shift = 22;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS:
+ ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns;
+ ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction;
+ ptp->nic_time.minor_max = 4000000000UL;
+ ptp->nic_time.sync_event_minor_shift = 24;
+ break;
+ default:
return -ERANGE;
}
- ptp->time_format = fmt;
-
- /* MC_CMD_PTP_OP_GET_ATTRIBUTES is an extended version of an older
- * operation MC_CMD_PTP_OP_GET_TIME_FORMAT that also returns a value
- * to use for the minimum acceptable corrected synchronization window.
+ /* Precalculate acceptable difference between the minor time in the
+ * packet prefix and the last MCDI time sync event. We expect the
+ * packet prefix timestamp to be after of sync event by up to one
+ * sync event interval (0.25s) but we allow it to exceed this by a
+ * fuzz factor of (0.1s)
+ */
+ ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max
+ - (ptp->nic_time.minor_max / 10);
+ ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4)
+ + (ptp->nic_time.minor_max / 10);
+
+ /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older
+ * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return
+ * a value to use for the minimum acceptable corrected synchronization
+ * window and may return further capabilities.
* If we have the extra information store it. For older firmware that
* does not implement the extended command use the default value.
*/
- if (rc == 0 && out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN)
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST)
ptp->min_synchronisation_ns =
MCDI_DWORD(outbuf,
PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN);
else
ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS;
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN)
+ ptp->capabilities = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_ATTRIBUTES_CAPABILITIES);
+ else
+ ptp->capabilities = 0;
+
+ /* Set up the shift for conversion between frequency
+ * adjustments in parts-per-billion and the fixed-point
+ * fractional ns format that the adapter uses.
+ */
+ if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN))
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44;
+ else
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40;
+
return 0;
}
@@ -534,8 +699,9 @@ static int efx_ptp_get_attributes(struct efx_nic *efx)
static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN);
int rc;
+ size_t out_len;
/* Get the timestamp corrections from the NIC. If this operation is
* not supported (older NICs) then no correction is required.
@@ -545,21 +711,37 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx)
MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), NULL);
+ outbuf, sizeof(outbuf), &out_len);
if (rc == 0) {
- efx->ptp_data->ts_corrections.tx = MCDI_DWORD(outbuf,
+ efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT);
- efx->ptp_data->ts_corrections.rx = MCDI_DWORD(outbuf,
+ efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE);
efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT);
efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN);
+
+ if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) {
+ efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX);
+ efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX);
+ } else {
+ efx->ptp_data->ts_corrections.general_tx =
+ efx->ptp_data->ts_corrections.ptp_tx;
+ efx->ptp_data->ts_corrections.general_rx =
+ efx->ptp_data->ts_corrections.ptp_rx;
+ }
} else if (rc == -EINVAL) {
- efx->ptp_data->ts_corrections.tx = 0;
- efx->ptp_data->ts_corrections.rx = 0;
+ efx->ptp_data->ts_corrections.ptp_tx = 0;
+ efx->ptp_data->ts_corrections.ptp_rx = 0;
efx->ptp_data->ts_corrections.pps_out = 0;
efx->ptp_data->ts_corrections.pps_in = 0;
+ efx->ptp_data->ts_corrections.general_tx = 0;
+ efx->ptp_data->ts_corrections.general_rx = 0;
} else {
efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf,
sizeof(outbuf), rc);
@@ -873,8 +1055,24 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
return rc;
}
+/* Transmit a PTP packet via the dedicated hardware timestamped queue. */
+static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp_data = efx->ptp_data;
+ struct efx_tx_queue *tx_queue;
+ u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
+
+ tx_queue = &ptp_data->channel->tx_queue[type];
+ if (tx_queue && tx_queue->timestamping) {
+ efx_enqueue_skb(tx_queue, skb);
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+ }
+}
+
/* Transmit a PTP packet, via the MCDI interface, to the wire. */
-static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb)
+static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb)
{
struct efx_ptp_data *ptp_data = efx->ptp_data;
struct skb_shared_hwtstamps timestamps;
@@ -910,16 +1108,16 @@ static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb)
timestamps.hwtstamp = ptp_data->nic_to_kernel_time(
MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR),
MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR),
- ptp_data->ts_corrections.tx);
+ ptp_data->ts_corrections.ptp_tx);
skb_tstamp_tx(skb, &timestamps);
rc = 0;
fail:
- dev_kfree_skb(skb);
+ dev_kfree_skb_any(skb);
- return rc;
+ return;
}
static void efx_ptp_drop_time_expired_events(struct efx_nic *efx)
@@ -1189,7 +1387,7 @@ static void efx_ptp_worker(struct work_struct *work)
efx_ptp_process_events(efx, &tempq);
while ((skb = skb_dequeue(&ptp_data->txq)))
- efx_ptp_xmit_skb(efx, skb);
+ ptp_data->xmit_skb(efx, skb);
while ((skb = __skb_dequeue(&tempq)))
efx_ptp_process_rx(efx, skb);
@@ -1239,6 +1437,14 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
goto fail2;
}
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ ptp->xmit_skb = efx_ptp_xmit_skb_queue;
+ /* Request sync events on this channel. */
+ channel->sync_events_state = SYNC_EVENTS_QUIESCENT;
+ } else {
+ ptp->xmit_skb = efx_ptp_xmit_skb_mc;
+ }
+
INIT_WORK(&ptp->work, efx_ptp_worker);
ptp->config.flags = 0;
ptp->config.tx_type = HWTSTAMP_TX_OFF;
@@ -1303,11 +1509,21 @@ fail1:
static int efx_ptp_probe_channel(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
+ int rc;
channel->irq_moderation_us = 0;
channel->rx_queue.core_index = 0;
- return efx_ptp_probe(efx, channel);
+ rc = efx_ptp_probe(efx, channel);
+ /* Failure to probe PTP is not fatal; this channel will just not be
+ * used for anything.
+ * In the case of EPERM, efx_ptp_probe will print its own message (in
+ * efx_ptp_get_attributes()), so we don't need to.
+ */
+ if (rc && rc != -EPERM)
+ netif_warn(efx, drv, efx->net_dev,
+ "Failed to probe PTP, rc=%d\n", rc);
+ return 0;
}
void efx_ptp_remove(struct efx_nic *efx)
@@ -1332,6 +1548,7 @@ void efx_ptp_remove(struct efx_nic *efx)
efx_nic_free_buffer(efx, &efx->ptp_data->start);
kfree(efx->ptp_data);
+ efx->ptp_data = NULL;
}
static void efx_ptp_remove_channel(struct efx_channel *channel)
@@ -1548,6 +1765,17 @@ void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info)
ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE);
+ /* Check licensed features. If we don't have the license for TX
+ * timestamps, the NIC will not support them.
+ */
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ if (!(nic_data->licensed_features &
+ (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN)))
+ ts_info->so_timestamping &=
+ ~SOF_TIMESTAMPING_TX_HARDWARE;
+ }
if (primary && primary->ptp_data && primary->ptp_data->phc_clock)
ts_info->phc_index =
ptp_clock_index(primary->ptp_data->phc_clock);
@@ -1627,7 +1855,7 @@ static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp)
evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time(
EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA),
EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA),
- ptp->ts_corrections.rx);
+ ptp->ts_corrections.ptp_rx);
evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
list_add_tail(&evt->link, &ptp->evt_list);
@@ -1709,9 +1937,20 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev)
void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev)
{
+ struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ /* When extracting the sync timestamp minor value, we should discard
+ * the least significant two bits. These are not required in order
+ * to reconstruct full-range timestamps and they are optionally used
+ * to report status depending on the options supplied when subscribing
+ * for sync events.
+ */
channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR);
channel->sync_timestamp_minor =
- MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_26_19) << 19;
+ (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC)
+ << ptp->nic_time.sync_event_minor_shift;
+
/* if sync events have been disabled then we want to silently ignore
* this event, so throw away result.
*/
@@ -1719,15 +1958,6 @@ void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev)
SYNC_EVENTS_VALID);
}
-/* make some assumptions about the time representation rather than abstract it,
- * since we currently only support one type of inline timestamping and only on
- * EF10.
- */
-#define MINOR_TICKS_PER_SECOND 0x8000000
-/* Fuzz factor for sync events to be out of order with RX events */
-#define FUZZ (MINOR_TICKS_PER_SECOND / 10)
-#define EXPECTED_SYNC_EVENTS_PER_SECOND 4
-
static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -1745,31 +1975,33 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
struct sk_buff *skb)
{
struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
u32 pkt_timestamp_major, pkt_timestamp_minor;
u32 diff, carry;
struct skb_shared_hwtstamps *timestamps;
- pkt_timestamp_minor = (efx_rx_buf_timestamp_minor(efx,
- skb_mac_header(skb)) +
- (u32) efx->ptp_data->ts_corrections.rx) &
- (MINOR_TICKS_PER_SECOND - 1);
+ if (channel->sync_events_state != SYNC_EVENTS_VALID)
+ return;
+
+ pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb));
/* get the difference between the packet and sync timestamps,
* modulo one second
*/
- diff = (pkt_timestamp_minor - channel->sync_timestamp_minor) &
- (MINOR_TICKS_PER_SECOND - 1);
+ diff = pkt_timestamp_minor - channel->sync_timestamp_minor;
+ if (pkt_timestamp_minor < channel->sync_timestamp_minor)
+ diff += ptp->nic_time.minor_max;
+
/* do we roll over a second boundary and need to carry the one? */
- carry = channel->sync_timestamp_minor + diff > MINOR_TICKS_PER_SECOND ?
+ carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ?
1 : 0;
- if (diff <= MINOR_TICKS_PER_SECOND / EXPECTED_SYNC_EVENTS_PER_SECOND +
- FUZZ) {
+ if (diff <= ptp->nic_time.sync_event_diff_max) {
/* packet is ahead of the sync event by a quarter of a second or
* less (allowing for fuzz)
*/
pkt_timestamp_major = channel->sync_timestamp_major + carry;
- } else if (diff >= MINOR_TICKS_PER_SECOND - FUZZ) {
+ } else if (diff >= ptp->nic_time.sync_event_diff_min) {
/* packet is behind the sync event but within the fuzz factor.
* This means the RX packet and sync event crossed as they were
* placed on the event queue, which can sometimes happen.
@@ -1791,7 +2023,9 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
/* attach the timestamps to the skb */
timestamps = skb_hwtstamps(skb);
timestamps->hwtstamp =
- efx_ptp_s27_to_ktime(pkt_timestamp_major, pkt_timestamp_minor);
+ ptp->nic_to_kernel_time(pkt_timestamp_major,
+ pkt_timestamp_minor,
+ ptp->ts_corrections.general_rx);
}
static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
@@ -1809,9 +2043,10 @@ static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
else if (delta < -MAX_PPB)
delta = -MAX_PPB;
- /* Convert ppb to fixed point ns. */
- adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >>
- (PPB_EXTRA_BITS + MAX_PPB_BITS));
+ /* Convert ppb to fixed point ns taking care to round correctly. */
+ adjustment_ns = ((s64)delta * PPB_SCALE_WORD +
+ (1 << (ptp_data->adjfreq_ppb_shift - 1))) >>
+ ptp_data->adjfreq_ppb_shift;
MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0);
@@ -1911,13 +2146,14 @@ static int efx_phc_enable(struct ptp_clock_info *ptp,
return 0;
}
-static const struct efx_channel_type efx_ptp_channel_type = {
+const struct efx_channel_type efx_ptp_channel_type = {
.handle_no_channel = efx_ptp_handle_no_channel,
.pre_probe = efx_ptp_probe_channel,
.post_remove = efx_ptp_remove_channel,
.get_name = efx_ptp_get_channel_name,
/* no copy operation; there is no need to reallocate this channel */
.receive_skb = efx_ptp_rx,
+ .want_txqs = efx_ptp_want_txqs,
.keep_eventq = false,
};
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 9937a24..cece961 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -77,9 +77,23 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
}
if (buffer->flags & EFX_TX_BUF_SKB) {
+ struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
(*pkts_compl)++;
- (*bytes_compl) += buffer->skb->len;
+ (*bytes_compl) += skb->len;
+ if (tx_queue->timestamping &&
+ (tx_queue->completed_timestamp_major ||
+ tx_queue->completed_timestamp_minor)) {
+ struct skb_shared_hwtstamps hwtstamp;
+
+ hwtstamp.hwtstamp =
+ efx_ptp_nic_to_kernel_time(tx_queue);
+ skb_tstamp_tx(skb, &hwtstamp);
+
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+ }
dev_consume_skb_any((struct sk_buff *)buffer->skb);
netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
"TX queue %d transmission id %x complete\n",
@@ -828,6 +842,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->old_read_count = 0;
tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
tx_queue->xmit_more_available = false;
+ tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+ tx_queue->channel == efx_ptp_channel(efx));
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
/* Set up default function pointers. These may get replaced by
* efx_nic_init_tx() based off NIC/queue capabilities.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 2fd8456..c728ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -334,7 +334,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
if (tx_own)
tdes3 |= TDES3_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
@@ -377,7 +377,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
if (tx_own)
tdes3 |= TDES3_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index b47cb5c..6768a25 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -341,7 +341,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
if (tx_own)
tdes0 |= ETDES0_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index 074ddeb..09388c0 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -4,4 +4,8 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o
netdevsim-objs := \
netdev.o \
- bpf.o \
+
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+netdevsim-objs += \
+ bpf.o
+endif
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index b3851bb..de73c1f 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -23,6 +23,9 @@
#include "netdevsim.h"
+#define pr_vlog(env, fmt, ...) \
+ bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__)
+
struct nsim_bpf_bound_prog {
struct netdevsim *ns;
struct bpf_prog *prog;
@@ -77,6 +80,9 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
if (state->ns->bpf_bind_verifier_delay && !insn_idx)
msleep(state->ns->bpf_bind_verifier_delay);
+ if (insn_idx == env->prog->len - 1)
+ pr_vlog(env, "Hello from netdevsim!\n");
+
return 0;
}
@@ -123,17 +129,32 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
struct netdevsim *ns = cb_priv;
struct bpf_prog *oldprog;
- if (type != TC_SETUP_CLSBPF ||
- !tc_can_offload(ns->netdev) ||
- cls_bpf->common.protocol != htons(ETH_P_ALL) ||
- cls_bpf->common.chain_index)
+ if (type != TC_SETUP_CLSBPF) {
+ NSIM_EA(cls_bpf->common.extack,
+ "only offload of BPF classifiers supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!tc_cls_can_offload_and_chain0(ns->netdev, &cls_bpf->common))
return -EOPNOTSUPP;
- if (!ns->bpf_tc_accept)
+ if (cls_bpf->common.protocol != htons(ETH_P_ALL)) {
+ NSIM_EA(cls_bpf->common.extack,
+ "only ETH_P_ALL supported as filter protocol");
return -EOPNOTSUPP;
+ }
+
+ if (!ns->bpf_tc_accept) {
+ NSIM_EA(cls_bpf->common.extack,
+ "netdevsim configured to reject BPF TC offload");
+ return -EOPNOTSUPP;
+ }
/* Note: progs without skip_sw will probably not be dev bound */
- if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept)
+ if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) {
+ NSIM_EA(cls_bpf->common.extack,
+ "netdevsim configured to reject unbound programs");
return -EOPNOTSUPP;
+ }
if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
return -EOPNOTSUPP;
@@ -145,8 +166,11 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
oldprog = NULL;
if (!cls_bpf->prog)
return 0;
- if (ns->bpf_offloaded)
+ if (ns->bpf_offloaded) {
+ NSIM_EA(cls_bpf->common.extack,
+ "driver and netdev offload states mismatch");
return -EBUSY;
+ }
}
return nsim_bpf_offload(ns, cls_bpf->prog, oldprog);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index b803612..ea081c1 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -68,12 +68,40 @@ struct netdevsim {
extern struct dentry *nsim_ddir;
+#ifdef CONFIG_BPF_SYSCALL
int nsim_bpf_init(struct netdevsim *ns);
void nsim_bpf_uninit(struct netdevsim *ns);
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf);
int nsim_bpf_disable_tc(struct netdevsim *ns);
int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
void *type_data, void *cb_priv);
+#else
+static inline int nsim_bpf_init(struct netdevsim *ns)
+{
+ return 0;
+}
+
+static inline void nsim_bpf_uninit(struct netdevsim *ns)
+{
+}
+
+static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP;
+}
+
+static inline int nsim_bpf_disable_tc(struct netdevsim *ns)
+{
+ return 0;
+}
+
+static inline int
+nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ return -EOPNOTSUPP;
+}
+#endif
static inline struct netdevsim *to_nsim(struct device *ptr)
{
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index bdc4bb3..8961209 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(sfp_upstream_stop);
/**
* sfp_register_upstream() - Register the neighbouring device
- * @np: device node for the SFP bus
+ * @fwnode: firmware node for the SFP bus
* @ndev: network device associated with the interface
* @upstream: the upstream private data
* @ops: the upstream's &struct sfp_upstream_ops
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 4e1da16..5aa59f4 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
struct pppoe_hdr *ph;
struct net_device *dev;
char *start;
+ int hlen;
lock_sock(sk);
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
@@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
if (total_len > (dev->mtu + dev->hard_header_len))
goto end;
-
- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
- 0, GFP_KERNEL);
+ hlen = LL_RESERVED_SPACE(dev);
+ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
+ dev->needed_tailroom, 0, GFP_KERNEL);
if (!skb) {
error = -ENOMEM;
goto end;
}
/* Reserve space for headers. */
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
skb->dev = dev;
@@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
/* Copy the data if there is no space for the header or if it's
* read-only.
*/
- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
+ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
goto abort;
__skb_push(skb, sizeof(*ph));
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index d56fe32..8a22ff6 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -457,12 +457,10 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
void usbnet_defer_kevent (struct usbnet *dev, int work)
{
set_bit (work, &dev->flags);
- if (!schedule_work (&dev->kevent)) {
- if (net_ratelimit())
- netdev_err(dev->net, "kevent %d may have been dropped\n", work);
- } else {
+ if (!schedule_work (&dev->kevent))
+ netdev_dbg(dev->net, "kevent %d may have been dropped\n", work);
+ else
netdev_dbg(dev->net, "kevent %d scheduled\n", work);
- }
}
EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d1c7029..cf95290 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
rq->rx_ring[i].basePA);
rq->rx_ring[i].base = NULL;
}
- rq->buf_info[i] = NULL;
}
if (rq->data_ring.base) {
@@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
(rq->rx_ring[0].size + rq->rx_ring[1].size);
dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
rq->buf_info_pa);
+ rq->buf_info[0] = rq->buf_info[1] = NULL;
}
}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 58476b7..c940685 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -486,15 +486,28 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type,
int sas_eh_abort_handler(struct scsi_cmnd *cmd)
{
- int res;
+ int res = TMF_RESP_FUNC_FAILED;
struct sas_task *task = TO_SAS_TASK(cmd);
struct Scsi_Host *host = cmd->device->host;
+ struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_internal *i = to_sas_internal(host->transportt);
+ unsigned long flags;
if (!i->dft->lldd_abort_task)
return FAILED;
- res = i->dft->lldd_abort_task(task);
+ spin_lock_irqsave(host->host_lock, flags);
+ /* We cannot do async aborts for SATA devices */
+ if (dev_is_sata(dev) && !host->host_eh_scheduled) {
+ spin_unlock_irqrestore(host->host_lock, flags);
+ return FAILED;
+ }
+ spin_unlock_irqrestore(host->host_lock, flags);
+
+ if (task)
+ res = i->dft->lldd_abort_task(task);
+ else
+ SAS_DPRINTK("no task to abort\n");
if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
return SUCCESS;
diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 1bef398..3c573a8 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -225,6 +225,18 @@ void serdev_device_set_flow_control(struct serdev_device *serdev, bool enable)
}
EXPORT_SYMBOL_GPL(serdev_device_set_flow_control);
+int serdev_device_set_parity(struct serdev_device *serdev,
+ enum serdev_parity parity)
+{
+ struct serdev_controller *ctrl = serdev->ctrl;
+
+ if (!ctrl || !ctrl->ops->set_parity)
+ return -ENOTSUPP;
+
+ return ctrl->ops->set_parity(ctrl, parity);
+}
+EXPORT_SYMBOL_GPL(serdev_device_set_parity);
+
void serdev_device_wait_until_sent(struct serdev_device *serdev, long timeout)
{
struct serdev_controller *ctrl = serdev->ctrl;
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 247788a..1bdf295 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -190,6 +190,29 @@ static void ttyport_set_flow_control(struct serdev_controller *ctrl, bool enable
tty_set_termios(tty, &ktermios);
}
+static int ttyport_set_parity(struct serdev_controller *ctrl,
+ enum serdev_parity parity)
+{
+ struct serport *serport = serdev_controller_get_drvdata(ctrl);
+ struct tty_struct *tty = serport->tty;
+ struct ktermios ktermios = tty->termios;
+
+ ktermios.c_cflag &= ~(PARENB | PARODD | CMSPAR);
+ if (parity != SERDEV_PARITY_NONE) {
+ ktermios.c_cflag |= PARENB;
+ if (parity == SERDEV_PARITY_ODD)
+ ktermios.c_cflag |= PARODD;
+ }
+
+ tty_set_termios(tty, &ktermios);
+
+ if ((tty->termios.c_cflag & (PARENB | PARODD | CMSPAR)) !=
+ (ktermios.c_cflag & (PARENB | PARODD | CMSPAR)))
+ return -EINVAL;
+
+ return 0;
+}
+
static void ttyport_wait_until_sent(struct serdev_controller *ctrl, long timeout)
{
struct serport *serport = serdev_controller_get_drvdata(ctrl);
@@ -227,6 +250,7 @@ static const struct serdev_controller_ops ctrl_ops = {
.open = ttyport_open,
.close = ttyport_close,
.set_flow_control = ttyport_set_flow_control,
+ .set_parity = ttyport_set_parity,
.set_baudrate = ttyport_set_baudrate,
.wait_until_sent = ttyport_wait_until_sent,
.get_tiocm = ttyport_get_tiocm,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 33ac2b1..5727b18 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -904,7 +904,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d)
{
int i = 0;
for (i = 0; i < d->nvqs; ++i)
- mutex_lock(&d->vqs[i]->mutex);
+ mutex_lock_nested(&d->vqs[i]->mutex, i);
}
static void vhost_dev_unlock_vqs(struct vhost_dev *d)
@@ -1015,6 +1015,10 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
vhost_iotlb_notify_vq(dev, msg);
break;
case VHOST_IOTLB_INVALIDATE:
+ if (!dev->iotlb) {
+ ret = -EFAULT;
+ break;
+ }
vhost_vq_meta_reset(dev);
vhost_del_umem_range(dev->iotlb, msg->iova,
msg->iova + msg->size - 1);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index f650e47..fdf2aad 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,10 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
gi->gid[i] = exp->ex_anon_gid;
else
gi->gid[i] = rqgi->gid[i];
-
- /* Each thread allocates its own gi, no race */
- groups_sort(gi);
}
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
} else {
gi = get_group_info(rqgi);
}
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index ded456f..c584ad8 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
struct orangefs_kernel_op_s *op, *temp;
__s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
- struct orangefs_kernel_op_s *cur_op = NULL;
+ struct orangefs_kernel_op_s *cur_op;
unsigned long ret;
/* We do not support blocking IO. */
@@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
return -EAGAIN;
restart:
+ cur_op = NULL;
/* Get next op (if any) from top of list. */
spin_lock(&orangefs_request_list_lock);
list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index 835c6e1..0577d6d 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
*/
void purge_waiting_ops(void)
{
- struct orangefs_kernel_op_s *op;
+ struct orangefs_kernel_op_s *op, *tmp;
spin_lock(&orangefs_request_list_lock);
- list_for_each_entry(op, &orangefs_request_list, list) {
+ list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
gossip_debug(GOSSIP_WAIT_DEBUG,
"pvfs2-client-core: purging op tag %llu %s\n",
llu(op->tag),
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2bab819..3319df9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -332,6 +332,8 @@ extern int ftrace_text_reserved(const void *start, const void *end);
extern int ftrace_nr_registered_ops(void);
+struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
+
bool is_ftrace_trampoline(unsigned long addr);
/*
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 1ac5bf9..e16fe7d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -173,7 +173,7 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
}
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *);
void devinet_init(void);
struct in_device *inetdev_by_index(struct net *, int);
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
diff --git a/include/linux/net.h b/include/linux/net.h
index caeb159..68acc54 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -306,7 +306,6 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 837e9cb..cd46d3d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -851,6 +851,7 @@ struct xfrmdev_ops {
void (*xdo_dev_state_free) (struct xfrm_state *x);
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
};
#endif
@@ -1469,8 +1470,6 @@ enum netdev_priv_flags {
* @base_addr: Device I/O address
* @irq: Device IRQ number
*
- * @carrier_changes: Stats to monitor carrier on<->off transitions
- *
* @state: Generic network queuing layer state, see netdev_state_t
* @dev_list: The global list of network devices
* @napi_list: List entry used for polling NAPI devices
@@ -1506,6 +1505,8 @@ enum netdev_priv_flags {
* do not use this in drivers
* @rx_nohandler: nohandler dropped packets by core network on
* inactive devices, do not use this in drivers
+ * @carrier_up_count: Number of times the carrier has been up
+ * @carrier_down_count: Number of times the carrier has been down
*
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
@@ -2761,7 +2762,8 @@ static inline bool dev_validate_header(const struct net_device *dev,
return false;
}
-typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
+typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
+ int len, int size);
int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
static inline int unregister_gifconf(unsigned int family)
{
@@ -3314,7 +3316,9 @@ int netdev_rx_handler_register(struct net_device *dev,
void netdev_rx_handler_unregister(struct net_device *dev);
bool dev_valid_name(const char *name);
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+ bool *need_copyout);
+int dev_ifconf(struct net *net, struct ifconf *, int);
int dev_ethtool(struct net *net, struct ifreq *);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *, unsigned int flags);
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index d609e6d..a73d87b 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -76,6 +76,12 @@ static inline struct serdev_device_driver *to_serdev_device_driver(struct device
return container_of(d, struct serdev_device_driver, driver);
}
+enum serdev_parity {
+ SERDEV_PARITY_NONE,
+ SERDEV_PARITY_EVEN,
+ SERDEV_PARITY_ODD,
+};
+
/*
* serdev controller structures
*/
@@ -86,6 +92,7 @@ struct serdev_controller_ops {
int (*open)(struct serdev_controller *);
void (*close)(struct serdev_controller *);
void (*set_flow_control)(struct serdev_controller *, bool);
+ int (*set_parity)(struct serdev_controller *, enum serdev_parity);
unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int);
void (*wait_until_sent)(struct serdev_controller *, long);
int (*get_tiocm)(struct serdev_controller *);
@@ -298,6 +305,9 @@ static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enabl
return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS);
}
+int serdev_device_set_parity(struct serdev_device *serdev,
+ enum serdev_parity parity);
+
/*
* serdev hooks into TTY core
*/
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 9c5a262..1d3877c 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
}
+static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+{
+ return swp_offset(entry);
+}
+
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{
return pfn_to_page(swp_offset(entry));
@@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return false;
}
+static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{
return NULL;
@@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
}
+static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+{
+ return swp_offset(entry);
+}
+
static inline struct page *migration_entry_to_page(swp_entry_t entry)
{
struct page *p = pfn_to_page(swp_offset(entry));
@@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp)
{
return 0;
}
+
+static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline struct page *migration_entry_to_page(swp_entry_t entry)
{
return NULL;
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 8532917..bae807e 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -31,17 +31,11 @@
#else
#define MODULE_RANDSTRUCT_PLUGIN
#endif
-#ifdef RETPOLINE
-#define MODULE_VERMAGIC_RETPOLINE "retpoline "
-#else
-#define MODULE_VERMAGIC_RETPOLINE ""
-#endif
#define VERMAGIC_STRING \
UTS_RELEASE " " \
MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
MODULE_ARCH_VERMAGIC \
- MODULE_RANDSTRUCT_PLUGIN \
- MODULE_VERMAGIC_RETPOLINE
+ MODULE_RANDSTRUCT_PLUGIN
diff --git a/include/net/erspan.h b/include/net/erspan.h
index acdf684..5daa486 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -46,6 +46,8 @@
* GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
*/
+#include <uapi/linux/erspan.h>
+
#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
#define VER_MASK 0xf000
#define VLAN_MASK 0x0fff
@@ -65,17 +67,8 @@
#define GRA_MASK 0x0006
#define O_MASK 0x0001
-/* ERSPAN version 2 metadata header */
-struct erspan_md2 {
- __be32 timestamp;
- __be16 sgt; /* security group tag */
- __be16 flags;
-#define P_OFFSET 15
-#define FT_OFFSET 10
-#define HWID_OFFSET 4
-#define DIR_OFFSET 3
-#define GRA_OFFSET 1
-};
+#define HWID_OFFSET 4
+#define DIR_OFFSET 3
enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
@@ -86,24 +79,64 @@ enum erspan_encap_type {
#define ERSPAN_V1_MDSIZE 4
#define ERSPAN_V2_MDSIZE 8
-struct erspan_metadata {
- union {
- __be32 index; /* Version 1 (type II)*/
- struct erspan_md2 md2; /* Version 2 (type III) */
- } u;
- int version;
-};
struct erspan_base_hdr {
- __be16 ver_vlan;
-#define VER_OFFSET 12
- __be16 session_id;
-#define COS_OFFSET 13
-#define EN_OFFSET 11
-#define BSO_OFFSET EN_OFFSET
-#define T_OFFSET 10
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 vlan_upper:4,
+ ver:4;
+ __u8 vlan:8;
+ __u8 session_id_upper:2,
+ t:1,
+ en:2,
+ cos:3;
+ __u8 session_id:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 ver: 4,
+ vlan_upper:4;
+ __u8 vlan:8;
+ __u8 cos:3,
+ en:2,
+ t:1,
+ session_id_upper:2;
+ __u8 session_id:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
};
+static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id)
+{
+ ershdr->session_id = id & 0xff;
+ ershdr->session_id_upper = (id >> 8) & 0x3;
+}
+
+static inline u16 get_session_id(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->session_id_upper << 8) + ershdr->session_id;
+}
+
+static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan)
+{
+ ershdr->vlan = vlan & 0xff;
+ ershdr->vlan_upper = (vlan >> 8) & 0xf;
+}
+
+static inline u16 get_vlan(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->vlan_upper << 8) + ershdr->vlan;
+}
+
+static inline void set_hwid(struct erspan_md2 *md2, u8 hwid)
+{
+ md2->hwid = hwid & 0xf;
+ md2->hwid_upper = (hwid >> 4) & 0x3;
+}
+
+static inline u8 get_hwid(const struct erspan_md2 *md2)
+{
+ return (md2->hwid_upper << 4) + md2->hwid;
+}
+
static inline int erspan_hdr_len(int version)
{
return sizeof(struct erspan_base_hdr) +
@@ -120,10 +153,10 @@ static inline u8 tos_to_cos(u8 tos)
}
static inline void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
+ u32 id, u32 index,
bool truncate, bool is_ipv4)
{
- struct ethhdr *eth = eth_hdr(skb);
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
enum erspan_encap_type enc_type;
struct erspan_base_hdr *ershdr;
struct erspan_metadata *ersmd;
@@ -154,12 +187,12 @@ static inline void erspan_build_header(struct sk_buff *skb,
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
/* Build base header */
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION << VER_OFFSET));
- ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
- (enc_type << EN_OFFSET & EN_MASK) |
- ((truncate << T_OFFSET) & T_MASK));
+ ershdr->ver = ERSPAN_VERSION;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = enc_type;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
/* Build metadata */
ersmd = (struct erspan_metadata *)(ershdr + 1);
@@ -187,10 +220,10 @@ static inline __be32 erspan_get_timestamp(void)
}
static inline void erspan_build_header_v2(struct sk_buff *skb,
- __be32 id, u8 direction, u16 hwid,
+ u32 id, u8 direction, u16 hwid,
bool truncate, bool is_ipv4)
{
- struct ethhdr *eth = eth_hdr(skb);
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
struct erspan_base_hdr *ershdr;
struct erspan_metadata *md;
struct qtag_prefix {
@@ -198,7 +231,6 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
__be16 tci;
} *qp;
u16 vlan_tci = 0;
- u16 session_id;
u8 gra = 0; /* 100 usec */
u8 bso = 0; /* Bad/Short/Oversized */
u8 sgt = 0;
@@ -221,22 +253,23 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
/* Build base header */
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION2 << VER_OFFSET));
- session_id = (u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
- (bso << BSO_OFFSET & BSO_MASK) |
- ((truncate << T_OFFSET) & T_MASK);
- ershdr->session_id = htons(session_id);
+ ershdr->ver = ERSPAN_VERSION2;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = bso;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
/* Build metadata */
md = (struct erspan_metadata *)(ershdr + 1);
md->u.md2.timestamp = erspan_get_timestamp();
md->u.md2.sgt = htons(sgt);
- md->u.md2.flags = htons(((1 << P_OFFSET) & P_MASK) |
- ((hwid << HWID_OFFSET) & HWID_MASK) |
- ((direction << DIR_OFFSET) & DIR_MASK) |
- ((gra << GRA_OFFSET) & GRA_MASK));
+ md->u.md2.p = 1;
+ md->u.md2.ft = 0;
+ md->u.md2.dir = direction;
+ md->u.md2.gra = gra;
+ md->u.md2.o = 0;
+ set_hwid(&md->u.md2, hwid);
}
#endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9dc1230..8606c91 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -332,6 +332,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2f8f16a..8740625 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -535,7 +535,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
{
switch (layer) {
case TCF_LAYER_LINK:
- return skb->data;
+ return skb_mac_header(skb);
case TCF_LAYER_NETWORK:
return skb_network_header(skb);
case TCF_LAYER_TRANSPORT:
@@ -605,17 +605,6 @@ struct tc_cls_common_offload {
struct netlink_ext_ack *extack;
};
-static inline void
-tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
- const struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
-{
- cls_common->chain_index = tp->chain->index;
- cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
- cls_common->extack = extack;
-}
-
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tc_u32_sel *sel;
@@ -667,6 +656,20 @@ static inline bool tc_can_offload_extack(const struct net_device *dev,
return can;
}
+static inline bool
+tc_cls_can_offload_and_chain0(const struct net_device *dev,
+ struct tc_cls_common_offload *common)
+{
+ if (!tc_can_offload_extack(dev, common->extack))
+ return false;
+ if (common->chain_index) {
+ NL_SET_ERR_MSG(common->extack,
+ "Driver supports only offload of chain 0");
+ return false;
+ }
+ return true;
+}
+
static inline bool tc_skip_hw(u32 flags)
{
return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
@@ -694,6 +697,18 @@ static inline bool tc_in_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false;
}
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+ const struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ cls_common->chain_index = tp->chain->index;
+ cls_common->protocol = tp->protocol;
+ cls_common->prio = tp->prio;
+ if (tc_skip_sw(flags))
+ cls_common->extack = extack;
+}
+
enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
@@ -736,7 +751,6 @@ struct tc_cls_bpf_offload {
struct bpf_prog *oldprog;
const char *name;
bool exts_integrated;
- u32 gen_flags;
};
struct tc_mqprio_qopt_offload {
diff --git a/include/net/route.h b/include/net/route.h
index d538e6d..1eb9ce4 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr);
void ip_rt_multicast_event(struct in_device *);
-int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
+int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cd1be1f..eac43e8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -233,7 +233,8 @@ struct tcf_proto_ops {
const struct tcf_proto *,
struct tcf_result *);
int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack);
void* (*get)(struct tcf_proto*, u32 handle);
int (*change)(struct net *net, struct sk_buff *,
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 781f343..9470fd7 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -6,10 +6,16 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_csum.h>
+struct tcf_csum_params {
+ int action;
+ u32 update_flags;
+ struct rcu_head rcu;
+};
+
struct tcf_csum {
struct tc_action common;
- u32 update_flags;
+ struct tcf_csum_params __rcu *params;
};
#define to_tcf_csum(a) ((struct tcf_csum *)a)
@@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a)
static inline u32 tcf_csum_update_flags(const struct tc_action *a)
{
- return to_tcf_csum(a)->update_flags;
+ u32 update_flags;
+
+ rcu_read_lock();
+ update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags;
+ rcu_read_unlock();
+
+ return update_flags;
}
#endif /* __NET_TC_CSUM_H */
diff --git a/include/net/wext.h b/include/net/wext.h
index e51f067..aa192a6 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -7,7 +7,7 @@
struct net;
#ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg);
int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
unsigned long arg);
@@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
struct iw_statistics *get_wireless_stats(struct net_device *dev);
int call_commit_handler(struct net_device *dev);
#else
-static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg)
{
return -EINVAL;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2e6d4fe..7d20776 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1904,6 +1904,14 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+ struct xfrm_state_offload *xso = &x->xso;
+
+ if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+ xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
@@ -1974,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
return false;
}
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
return false;
diff --git a/include/uapi/linux/erspan.h b/include/uapi/linux/erspan.h
new file mode 100644
index 0000000..8415730
--- /dev/null
+++ b/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ERSPAN Tunnel Metadata
+ *
+ * Copyright (c) 2018 VMware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * Userspace API for metadata mode ERSPAN tunnel
+ */
+#ifndef _UAPI_ERSPAN_H
+#define _UAPI_ERSPAN_H
+
+#include <linux/types.h> /* For __beXX in userspace */
+#include <asm/byteorder.h>
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+ __be32 timestamp;
+ __be16 sgt; /* security group tag */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hwid_upper:2,
+ ft:5,
+ p:1;
+ __u8 o:1,
+ gra:2,
+ dir:1,
+ hwid:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 p:1,
+ ft:5,
+ hwid_upper:2;
+ __u8 hwid:4,
+ dir:1,
+ gra:2,
+ o:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+struct erspan_metadata {
+ int version;
+ union {
+ __be32 index; /* Version 1 (type II)*/
+ struct erspan_md2 md2; /* Version 2 (type III) */
+ } u;
+};
+
+#endif /* _UAPI_ERSPAN_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 496e59a..8fb90a0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149
#define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_PPC_GET_CPU_CHAR 151
+#define KVM_CAP_S390_BPB 152
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
/* Available with KVM_CAP_PPC_RADIX_MMU */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
+/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
+#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index dcfab5e..713e56c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -363,6 +363,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
+ OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
__OVS_TUNNEL_KEY_ATTR_MAX
};
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 0ba0dd8..5187dfe 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -321,15 +321,23 @@ void irq_matrix_remove_reserved(struct irq_matrix *m)
int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
bool reserved, unsigned int *mapped_cpu)
{
- unsigned int cpu;
+ unsigned int cpu, best_cpu, maxavl = 0;
+ struct cpumap *cm;
+ unsigned int bit;
+ best_cpu = UINT_MAX;
for_each_cpu(cpu, msk) {
- struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
- unsigned int bit;
+ cm = per_cpu_ptr(m->maps, cpu);
- if (!cm->online)
+ if (!cm->online || cm->available <= maxavl)
continue;
+ best_cpu = cpu;
+ maxavl = cm->available;
+ }
+
+ if (maxavl) {
+ cm = per_cpu_ptr(m->maps, best_cpu);
bit = matrix_alloc_area(m, cm, 1, false);
if (bit < m->alloc_end) {
cm->allocated++;
@@ -338,8 +346,8 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
m->global_available--;
if (reserved)
m->global_reserved--;
- *mapped_cpu = cpu;
- trace_irq_matrix_alloc(bit, cpu, m, cm);
+ *mapped_cpu = best_cpu;
+ trace_irq_matrix_alloc(bit, best_cpu, m, cm);
return bit;
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ccdf366..554b517 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1119,15 +1119,11 @@ static struct ftrace_ops global_ops = {
};
/*
- * This is used by __kernel_text_address() to return true if the
- * address is on a dynamically allocated trampoline that would
- * not return true for either core_kernel_text() or
- * is_module_text_address().
+ * Used by the stack undwinder to know about dynamic ftrace trampolines.
*/
-bool is_ftrace_trampoline(unsigned long addr)
+struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
{
- struct ftrace_ops *op;
- bool ret = false;
+ struct ftrace_ops *op = NULL;
/*
* Some of the ops may be dynamically allocated,
@@ -1144,15 +1140,24 @@ bool is_ftrace_trampoline(unsigned long addr)
if (op->trampoline && op->trampoline_size)
if (addr >= op->trampoline &&
addr < op->trampoline + op->trampoline_size) {
- ret = true;
- goto out;
+ preempt_enable_notrace();
+ return op;
}
} while_for_each_ftrace_op(op);
-
- out:
preempt_enable_notrace();
- return ret;
+ return NULL;
+}
+
+/*
+ * This is used by __kernel_text_address() to return true if the
+ * address is on a dynamically allocated trampoline that would
+ * not return true for either core_kernel_text() or
+ * is_module_text_address().
+ */
+bool is_ftrace_trampoline(unsigned long addr)
+{
+ return ftrace_ops_trampoline(addr) != NULL;
}
struct ftrace_page {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a8d8a2..8e3f20a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2374,6 +2374,15 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
}
EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+/*
+ * Skip 3:
+ *
+ * trace_buffer_unlock_commit_regs()
+ * trace_event_buffer_commit()
+ * trace_event_raw_event_xxx()
+*/
+# define STACK_SKIP 3
+
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct ring_buffer *buffer,
struct ring_buffer_event *event,
@@ -2383,16 +2392,12 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
__buffer_unlock_commit(buffer, event);
/*
- * If regs is not set, then skip the following callers:
- * trace_buffer_unlock_commit_regs
- * event_trigger_unlock_commit
- * trace_event_buffer_commit
- * trace_event_raw_event_sched_switch
+ * If regs is not set, then skip the necessary functions.
* Note, we can still get here via blktrace, wakeup tracer
* and mmiotrace, but that's ok if they lose a function or
- * two. They are that meaningful.
+ * two. They are not that meaningful.
*/
- ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
@@ -2579,11 +2584,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
trace.skip = skip;
/*
- * Add two, for this function and the call to save_stack_trace()
+ * Add one, for this function and the call to save_stack_trace()
* If regs is set, then these functions will not be in the way.
*/
+#ifndef CONFIG_UNWINDER_ORC
if (!regs)
- trace.skip += 2;
+ trace.skip++;
+#endif
/*
* Since events can happen in NMIs there's no safe way to
@@ -2711,11 +2718,10 @@ void trace_dump_stack(int skip)
local_save_flags(flags);
- /*
- * Skip 3 more, seems to get us at the caller of
- * this function.
- */
- skip += 3;
+#ifndef CONFIG_UNWINDER_ORC
+ /* Skip 1 to skip this function. */
+ skip++;
+#endif
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index f2ac9d4..8741148 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1123,13 +1123,22 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
#endif /* CONFIG_TRACER_SNAPSHOT */
#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_UNWINDER_ORC
+/* Skip 2:
+ * event_triggers_post_call()
+ * trace_event_raw_event_xxx()
+ */
+# define STACK_SKIP 2
+#else
/*
- * Skip 3:
+ * Skip 4:
* stacktrace_trigger()
* event_triggers_post_call()
+ * trace_event_buffer_commit()
* trace_event_raw_event_xxx()
*/
-#define STACK_SKIP 3
+#define STACK_SKIP 4
+#endif
static void
stacktrace_trigger(struct event_trigger_data *data, void *rec)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 27f7ad1..b611cd3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -154,6 +154,24 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
}
+#ifdef CONFIG_UNWINDER_ORC
+/*
+ * Skip 2:
+ *
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 2
+#else
+/*
+ * Skip 3:
+ * __trace_stack()
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 3
+#endif
+
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
@@ -180,15 +198,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (likely(disabled == 1)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
- /*
- * skip over 5 funcs:
- * __ftrace_trace_stack,
- * __trace_stack,
- * function_stack_trace_call
- * ftrace_list_func
- * ftrace_call
- */
- __trace_stack(tr, flags, 5, pc);
+ __trace_stack(tr, flags, STACK_SKIP, pc);
}
atomic_dec(&data->disabled);
@@ -367,14 +377,27 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip,
tracer_tracing_off(tr);
}
+#ifdef CONFIG_UNWINDER_ORC
/*
- * Skip 4:
+ * Skip 3:
+ *
+ * function_trace_probe_call()
+ * ftrace_ops_assist_func()
+ * ftrace_call()
+ */
+#define FTRACE_STACK_SKIP 3
+#else
+/*
+ * Skip 5:
+ *
+ * __trace_stack()
* ftrace_stacktrace()
* function_trace_probe_call()
- * ftrace_ops_list_func()
+ * ftrace_ops_assist_func()
* ftrace_call()
*/
-#define STACK_SKIP 4
+#define FTRACE_STACK_SKIP 5
+#endif
static __always_inline void trace_stack(struct trace_array *tr)
{
@@ -384,7 +407,7 @@ static __always_inline void trace_stack(struct trace_array *tr)
local_save_flags(flags);
pc = preempt_count();
- __trace_stack(tr, flags, STACK_SKIP, pc);
+ __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc);
}
static void
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index d22b843..ae3c2a3 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -30,10 +30,37 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
return true;
}
+static inline bool pfn_in_hpage(struct page *hpage, unsigned long pfn)
+{
+ unsigned long hpage_pfn = page_to_pfn(hpage);
+
+ /* THP can be referenced by any subpage */
+ return pfn >= hpage_pfn && pfn - hpage_pfn < hpage_nr_pages(hpage);
+}
+
+/**
+ * check_pte - check if @pvmw->page is mapped at the @pvmw->pte
+ *
+ * page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
+ * mapped. check_pte() has to validate this.
+ *
+ * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary
+ * page.
+ *
+ * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
+ * entry that points to @pvmw->page or any subpage in case of THP.
+ *
+ * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to
+ * @pvmw->page or any subpage in case of THP.
+ *
+ * Otherwise, return false.
+ *
+ */
static bool check_pte(struct page_vma_mapped_walk *pvmw)
{
+ unsigned long pfn;
+
if (pvmw->flags & PVMW_MIGRATION) {
-#ifdef CONFIG_MIGRATION
swp_entry_t entry;
if (!is_swap_pte(*pvmw->pte))
return false;
@@ -41,38 +68,25 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
if (!is_migration_entry(entry))
return false;
- if (migration_entry_to_page(entry) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (migration_entry_to_page(entry) < pvmw->page)
- return false;
-#else
- WARN_ON_ONCE(1);
-#endif
- } else {
- if (is_swap_pte(*pvmw->pte)) {
- swp_entry_t entry;
- entry = pte_to_swp_entry(*pvmw->pte);
- if (is_device_private_entry(entry) &&
- device_private_entry_to_page(entry) == pvmw->page)
- return true;
- }
+ pfn = migration_entry_to_pfn(entry);
+ } else if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
- if (!pte_present(*pvmw->pte))
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
return false;
- /* THP can be referenced by any subpage */
- if (pte_page(*pvmw->pte) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (pte_page(*pvmw->pte) < pvmw->page)
+ pfn = device_private_entry_to_pfn(entry);
+ } else {
+ if (!pte_present(*pvmw->pte))
return false;
+
+ pfn = pte_pfn(*pvmw->pte);
}
- return true;
+ return pfn_in_hpage(pvmw->page, pfn);
}
/**
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a15c0e0..a4399be 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -11,7 +11,7 @@ menuconfig CAN
1991, mainly for automotive, but now widely used in marine
(NMEA2000), industrial, and medical applications.
More information on the CAN network protocol family PF_CAN
- is contained in <Documentation/networking/can.txt>.
+ is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
specific driver for your controller(s) below.
diff --git a/net/core/dev.c b/net/core/dev.c
index 94435cd..4670cca 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @dev: net_device pointer passed unmodified to notifier function
* @info: notifier information data
*
* Call all network notifier blocks. Parameters and return value
@@ -3167,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -6425,6 +6435,7 @@ rollback:
* netdev_upper_dev_link - Add a link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. The caller must hold
* the RTNL lock. On a failure a negative errno code is returned.
@@ -6446,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* @upper_dev: new upper device
* @upper_priv: upper device private
* @upper_info: upper info to be passed down via notifier
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7e690d0..0ab1af0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -18,26 +18,10 @@
* match. --pb
*/
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq *ifr)
{
- struct ifreq ifr;
- int error;
-
- /*
- * Fetch the caller's info block.
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
- ifr.ifr_name[IFNAMSIZ-1] = 0;
-
- error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
- if (error)
- return error;
-
- if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+ return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
static gifconf_func_t *gifconf_list[NPROTO];
@@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf);
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(struct net *net, char __user *arg)
+int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
{
- struct ifconf ifc;
struct net_device *dev;
char __user *pos;
int len;
@@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg)
* Fetch the caller's info block.
*/
- if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
- return -EFAULT;
-
- pos = ifc.ifc_buf;
- len = ifc.ifc_len;
+ pos = ifc->ifc_buf;
+ len = ifc->ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
@@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg)
if (gifconf_list[i]) {
int done;
if (!pos)
- done = gifconf_list[i](dev, NULL, 0);
+ done = gifconf_list[i](dev, NULL, 0, size);
else
done = gifconf_list[i](dev, pos + total,
- len - total);
+ len - total, size);
if (done < 0)
return -EFAULT;
total += done;
@@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg)
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len = total;
+ ifc->ifc_len = total;
/*
* Both BSD and Solaris return 0 here, so we do too.
*/
- return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+ return 0;
}
/*
@@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
{
- struct ifreq ifr;
int ret;
char *colon;
- /* One special case: SIOCGIFCONF takes ifconf argument
- and requires shared lock, because it sleeps writing
- to user space.
- */
-
- if (cmd == SIOCGIFCONF) {
- rtnl_lock();
- ret = dev_ifconf(net, (char __user *) arg);
- rtnl_unlock();
- return ret;
- }
+ if (need_copyout)
+ *need_copyout = true;
if (cmd == SIOCGIFNAME)
- return dev_ifname(net, (struct ifreq __user *)arg);
-
- /*
- * Take care of Wireless Extensions. Unfortunately struct iwreq
- * isn't a proper subset of struct ifreq (it's 8 byte shorter)
- * so we need to treat it specially, otherwise applications may
- * fault if the struct they're passing happens to land at the
- * end of a mapped page.
- */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- struct iwreq iwr;
-
- if (copy_from_user(&iwr, arg, sizeof(iwr)))
- return -EFAULT;
-
- iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+ return dev_ifname(net, ifr);
- return wext_handle_ioctl(net, &iwr, cmd, arg);
- }
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
-
- ifr.ifr_name[IFNAMSIZ-1] = 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
+ dev_load(net, ifr->ifr_name);
+
/*
* See which interface the caller is talking about.
*/
@@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(net, ifr.ifr_name);
rcu_read_lock();
- ret = dev_ifsioc_locked(net, &ifr, cmd);
+ ret = dev_ifsioc_locked(net, ifr, cmd);
rcu_read_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
case SIOCETHTOOL:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, &ifr);
+ ret = dev_ethtool(net, ifr);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
+ if (need_copyout)
+ *need_copyout = false;
return ret;
case SIOCGIFMEM:
@@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret && copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
return ret;
}
return -ENOTTY;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9828616..e38e641 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (dev->ifindex != dev_get_iflink(dev))
return true;
- if (dev->priv_flags & IFF_TEAM_PORT)
+ if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1ccb953..3cad5f5 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -439,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags,
return net;
}
+static void unhash_nsid(struct net *net, struct net *last)
+{
+ struct net *tmp;
+ /* This function is only called from cleanup_net() work,
+ * and this work is the only process, that may delete
+ * a net from net_namespace_list. So, when the below
+ * is executing, the list may only grow. Thus, we do not
+ * use for_each_net_rcu() or rtnl_lock().
+ */
+ for_each_net(tmp) {
+ int id;
+
+ spin_lock_bh(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ spin_unlock_bh(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ if (tmp == last)
+ break;
+ }
+ spin_lock_bh(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock_bh(&net->nsid_lock);
+}
+
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
- struct net *net, *tmp;
+ struct net *net, *tmp, *last;
struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
@@ -458,26 +485,25 @@ static void cleanup_net(struct work_struct *work)
/* Don't let anyone else find us. */
rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ list_for_each_entry(net, &net_kill_list, cleanup_list)
list_del_rcu(&net->list);
- list_add_tail(&net->exit_list, &net_exit_list);
- for_each_net(tmp) {
- int id;
-
- spin_lock_bh(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
- }
- spin_lock_bh(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ /* Cache last net. After we unlock rtnl, no one new net
+ * added to net_namespace_list can assign nsid pointer
+ * to a net from net_kill_list (see peernet2id_alloc()).
+ * So, we skip them in unhash_nsid().
+ *
+ * Note, that unhash_nsid() does not delete nsid links
+ * between net_kill_list's nets, as they've already
+ * deleted from net_namespace_list. But, this would be
+ * useless anyway, as netns_ids are destroyed there.
+ */
+ last = list_last_entry(&net_namespace_list, struct net, list);
+ rtnl_unlock();
+ list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ unhash_nsid(net, last);
+ list_add_tail(&net->exit_list, &net_exit_list);
}
- rtnl_unlock();
/*
* Another CPU might be rcu-iterating the list, wait for it.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4fcfcb1..b8ab5c8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -184,25 +184,44 @@
#define func_enter() pr_debug("entering %s\n", __func__);
+#define PKT_FLAGS \
+ pf(IPV6) /* Interface in IPV6 Mode */ \
+ pf(IPSRC_RND) /* IP-Src Random */ \
+ pf(IPDST_RND) /* IP-Dst Random */ \
+ pf(TXSIZE_RND) /* Transmit size is random */ \
+ pf(UDPSRC_RND) /* UDP-Src Random */ \
+ pf(UDPDST_RND) /* UDP-Dst Random */ \
+ pf(UDPCSUM) /* Include UDP checksum */ \
+ pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \
+ pf(MPLS_RND) /* Random MPLS labels */ \
+ pf(QUEUE_MAP_RND) /* queue map Random */ \
+ pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \
+ pf(FLOW_SEQ) /* Sequential flows */ \
+ pf(IPSEC) /* ipsec on for flows */ \
+ pf(MACSRC_RND) /* MAC-Src Random */ \
+ pf(MACDST_RND) /* MAC-Dst Random */ \
+ pf(VID_RND) /* Random VLAN ID */ \
+ pf(SVID_RND) /* Random SVLAN ID */ \
+ pf(NODE) /* Node memory alloc*/ \
+
+#define pf(flag) flag##_SHIFT,
+enum pkt_flags {
+ PKT_FLAGS
+};
+#undef pf
+
/* Device flag bits */
-#define F_IPSRC_RND (1<<0) /* IP-Src Random */
-#define F_IPDST_RND (1<<1) /* IP-Dst Random */
-#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */
-#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */
-#define F_MACSRC_RND (1<<4) /* MAC-Src Random */
-#define F_MACDST_RND (1<<5) /* MAC-Dst Random */
-#define F_TXSIZE_RND (1<<6) /* Transmit size is random */
-#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */
-#define F_MPLS_RND (1<<8) /* Random MPLS labels */
-#define F_VID_RND (1<<9) /* Random VLAN ID */
-#define F_SVID_RND (1<<10) /* Random SVLAN ID */
-#define F_FLOW_SEQ (1<<11) /* Sequential flows */
-#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
-#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
-#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
-#define F_NODE (1<<15) /* Node memory alloc*/
-#define F_UDPCSUM (1<<16) /* Include UDP checksum */
-#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
+#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT);
+PKT_FLAGS
+#undef pf
+
+#define pf(flag) __stringify(flag),
+static char *pkt_flag_names[] = {
+ PKT_FLAGS
+};
+#undef pf
+
+#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names)
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -534,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
{
const struct pktgen_dev *pkt_dev = seq->private;
ktime_t stopped;
+ unsigned int i;
u64 idle;
seq_printf(seq,
@@ -595,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
if (pkt_dev->nr_labels) {
- unsigned int i;
seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
@@ -631,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
- if (pkt_dev->flags & F_IPV6)
- seq_puts(seq, "IPV6 ");
-
- if (pkt_dev->flags & F_IPSRC_RND)
- seq_puts(seq, "IPSRC_RND ");
-
- if (pkt_dev->flags & F_IPDST_RND)
- seq_puts(seq, "IPDST_RND ");
-
- if (pkt_dev->flags & F_TXSIZE_RND)
- seq_puts(seq, "TXSIZE_RND ");
-
- if (pkt_dev->flags & F_UDPSRC_RND)
- seq_puts(seq, "UDPSRC_RND ");
-
- if (pkt_dev->flags & F_UDPDST_RND)
- seq_puts(seq, "UDPDST_RND ");
-
- if (pkt_dev->flags & F_UDPCSUM)
- seq_puts(seq, "UDPCSUM ");
-
- if (pkt_dev->flags & F_NO_TIMESTAMP)
- seq_puts(seq, "NO_TIMESTAMP ");
-
- if (pkt_dev->flags & F_MPLS_RND)
- seq_puts(seq, "MPLS_RND ");
-
- if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_puts(seq, "QUEUE_MAP_RND ");
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (i == F_FLOW_SEQ)
+ if (!pkt_dev->cflows)
+ continue;
- if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_puts(seq, "QUEUE_MAP_CPU ");
-
- if (pkt_dev->cflows) {
- if (pkt_dev->flags & F_FLOW_SEQ)
- seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
- else
- seq_puts(seq, "FLOW_RND ");
- }
+ if (pkt_dev->flags & (1 << i))
+ seq_printf(seq, "%s ", pkt_flag_names[i]);
+ else if (i == F_FLOW_SEQ)
+ seq_puts(seq, "FLOW_RND ");
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON) {
- seq_puts(seq, "IPSEC ");
- if (pkt_dev->spi)
+ if (i == F_IPSEC && pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
- }
#endif
-
- if (pkt_dev->flags & F_MACSRC_RND)
- seq_puts(seq, "MACSRC_RND ");
-
- if (pkt_dev->flags & F_MACDST_RND)
- seq_puts(seq, "MACDST_RND ");
-
- if (pkt_dev->flags & F_VID_RND)
- seq_puts(seq, "VID_RND ");
-
- if (pkt_dev->flags & F_SVID_RND)
- seq_puts(seq, "SVID_RND ");
-
- if (pkt_dev->flags & F_NODE)
- seq_puts(seq, "NODE_ALLOC ");
+ }
seq_puts(seq, "\n");
@@ -858,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return i;
}
+static __u32 pktgen_read_flag(const char *f, bool *disable)
+{
+ __u32 i;
+
+ if (f[0] == '!') {
+ *disable = true;
+ f++;
+ }
+
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT)
+ continue;
+
+ /* allow only disabling ipv6 flag */
+ if (!*disable && i == IPV6_SHIFT)
+ continue;
+
+ if (strcmp(f, pkt_flag_names[i]) == 0)
+ return 1 << i;
+ }
+
+ if (strcmp(f, "FLOW_RND") == 0) {
+ *disable = !*disable;
+ return F_FLOW_SEQ;
+ }
+
+ return 0;
+}
+
static ssize_t pktgen_if_write(struct file *file,
const char __user * user_buffer, size_t count,
loff_t * offset)
@@ -1215,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "flag")) {
+ __u32 flag;
char f[32];
+ bool disable = false;
+
memset(f, 0, 32);
len = strn_len(&user_buffer[i], sizeof(f) - 1);
if (len < 0)
@@ -1224,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(f, &user_buffer[i], len))
return -EFAULT;
i += len;
- if (strcmp(f, "IPSRC_RND") == 0)
- pkt_dev->flags |= F_IPSRC_RND;
-
- else if (strcmp(f, "!IPSRC_RND") == 0)
- pkt_dev->flags &= ~F_IPSRC_RND;
-
- else if (strcmp(f, "TXSIZE_RND") == 0)
- pkt_dev->flags |= F_TXSIZE_RND;
-
- else if (strcmp(f, "!TXSIZE_RND") == 0)
- pkt_dev->flags &= ~F_TXSIZE_RND;
-
- else if (strcmp(f, "IPDST_RND") == 0)
- pkt_dev->flags |= F_IPDST_RND;
-
- else if (strcmp(f, "!IPDST_RND") == 0)
- pkt_dev->flags &= ~F_IPDST_RND;
-
- else if (strcmp(f, "UDPSRC_RND") == 0)
- pkt_dev->flags |= F_UDPSRC_RND;
-
- else if (strcmp(f, "!UDPSRC_RND") == 0)
- pkt_dev->flags &= ~F_UDPSRC_RND;
-
- else if (strcmp(f, "UDPDST_RND") == 0)
- pkt_dev->flags |= F_UDPDST_RND;
-
- else if (strcmp(f, "!UDPDST_RND") == 0)
- pkt_dev->flags &= ~F_UDPDST_RND;
-
- else if (strcmp(f, "MACSRC_RND") == 0)
- pkt_dev->flags |= F_MACSRC_RND;
-
- else if (strcmp(f, "!MACSRC_RND") == 0)
- pkt_dev->flags &= ~F_MACSRC_RND;
-
- else if (strcmp(f, "MACDST_RND") == 0)
- pkt_dev->flags |= F_MACDST_RND;
-
- else if (strcmp(f, "!MACDST_RND") == 0)
- pkt_dev->flags &= ~F_MACDST_RND;
-
- else if (strcmp(f, "MPLS_RND") == 0)
- pkt_dev->flags |= F_MPLS_RND;
-
- else if (strcmp(f, "!MPLS_RND") == 0)
- pkt_dev->flags &= ~F_MPLS_RND;
- else if (strcmp(f, "VID_RND") == 0)
- pkt_dev->flags |= F_VID_RND;
+ flag = pktgen_read_flag(f, &disable);
- else if (strcmp(f, "!VID_RND") == 0)
- pkt_dev->flags &= ~F_VID_RND;
-
- else if (strcmp(f, "SVID_RND") == 0)
- pkt_dev->flags |= F_SVID_RND;
-
- else if (strcmp(f, "!SVID_RND") == 0)
- pkt_dev->flags &= ~F_SVID_RND;
-
- else if (strcmp(f, "FLOW_SEQ") == 0)
- pkt_dev->flags |= F_FLOW_SEQ;
-
- else if (strcmp(f, "QUEUE_MAP_RND") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_CPU;
-
- else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
-#ifdef CONFIG_XFRM
- else if (strcmp(f, "IPSEC") == 0)
- pkt_dev->flags |= F_IPSEC_ON;
-#endif
-
- else if (strcmp(f, "!IPV6") == 0)
- pkt_dev->flags &= ~F_IPV6;
-
- else if (strcmp(f, "NODE_ALLOC") == 0)
- pkt_dev->flags |= F_NODE;
-
- else if (strcmp(f, "!NODE_ALLOC") == 0)
- pkt_dev->flags &= ~F_NODE;
-
- else if (strcmp(f, "UDPCSUM") == 0)
- pkt_dev->flags |= F_UDPCSUM;
-
- else if (strcmp(f, "!UDPCSUM") == 0)
- pkt_dev->flags &= ~F_UDPCSUM;
-
- else if (strcmp(f, "NO_TIMESTAMP") == 0)
- pkt_dev->flags |= F_NO_TIMESTAMP;
-
- else if (strcmp(f, "!NO_TIMESTAMP") == 0)
- pkt_dev->flags &= ~F_NO_TIMESTAMP;
-
- else {
+ if (flag) {
+ if (disable)
+ pkt_dev->flags &= ~flag;
+ else
+ pkt_dev->flags |= flag;
+ } else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
@@ -2541,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].cur_daddr =
pkt_dev->cur_daddr;
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON)
+ if (pkt_dev->flags & F_IPSEC)
get_ipsec_sa(pkt_dev, flow);
#endif
pkt_dev->nflows++;
@@ -2646,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
static int process_ipsec(struct pktgen_dev *pkt_dev,
struct sk_buff *skb, __be16 protocol)
{
- if (pkt_dev->flags & F_IPSEC_ON) {
+ if (pkt_dev->flags & F_IPSEC) {
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
if (x) {
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 21f9bed..adf50fb 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- err = dsa_port_fixed_link_register_of(dp);
+ err = dsa_port_link_register_of(dp);
if (err) {
- dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
return err;
}
-
break;
case DSA_PORT_TYPE_USER:
err = dsa_slave_create(dp);
@@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- dsa_port_fixed_link_unregister_of(dp);
+ dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
if (dp->slave) {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cefb0c3..70de789 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -166,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_fixed_link_register_of(struct dsa_port *dp);
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
+int dsa_port_link_register_of(struct dsa_port *dp);
+void dsa_port_link_unregister_of(struct dsa_port *dp);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index aa56d3f..cb54b81 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
+ ret = dsa_port_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
@@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_port_fixed_link_unregister_of(&ds->ports[port]);
+ dsa_port_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb4be26..7acc116 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
-int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+ struct device_node *port_dn = dp->dn;
+ struct device_node *phy_dn;
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int err = 0;
+
+ phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ if (!phy_dn)
+ return 0;
+
+ phydev = of_phy_find_device(phy_dn);
+ if (!phydev) {
+ err = -EPROBE_DEFER;
+ goto err_put_of;
+ }
+
+ if (enable) {
+ err = genphy_config_init(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_resume(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_read_status(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ } else {
+ err = genphy_suspend(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ }
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
+
+err_put_dev:
+ put_device(&phydev->mdio.dev);
+err_put_of:
+ of_node_put(phy_dn);
+ return err;
+}
+
+static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp)
int mode;
int err;
- if (of_phy_is_fixed_link(dn)) {
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
- phydev = of_phy_find_device(dn);
+ phydev = of_phy_find_device(dn);
- mode = of_get_phy_mode(dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
- put_device(&phydev->mdio.dev);
- }
+ put_device(&phydev->mdio.dev);
return 0;
}
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+int dsa_port_link_register_of(struct dsa_port *dp)
{
- struct device_node *dn = dp->dn;
+ if (of_phy_is_fixed_link(dp->dn))
+ return dsa_port_fixed_link_register_of(dp);
+ else
+ return dsa_port_setup_phy_of(dp, true);
+}
- if (of_phy_is_fixed_link(dn))
- of_phy_deregister_fixed_link(dn);
+void dsa_port_link_unregister_of(struct dsa_port *dp)
+{
+ if (of_phy_is_fixed_link(dp->dn))
+ of_phy_deregister_fixed_link(dp->dn);
+ else
+ dsa_port_setup_phy_of(dp, false);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54cccdd..c24008d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -872,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sock *sk = sock->sk;
int err = 0;
struct net *net = sock_net(sk);
+ void __user *p = (void __user *)arg;
+ struct ifreq ifr;
+ struct rtentry rt;
switch (cmd) {
case SIOCGSTAMP:
@@ -882,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
case SIOCADDRT:
case SIOCDELRT:
+ if (copy_from_user(&rt, p, sizeof(struct rtentry)))
+ return -EFAULT;
+ err = ip_rt_ioctl(net, cmd, &rt);
+ break;
case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ err = -EINVAL;
break;
case SIOCDARP:
case SIOCGARP:
@@ -891,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
- case SIOCSIFADDR:
case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
+ case SIOCGIFPFLAGS:
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
+ if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ err = -EFAULT;
+ break;
+
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
break;
default:
if (sk->sk_prot->ioctl)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7a93359..e056c00 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr)
}
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
- struct ifreq ifr;
struct sockaddr_in sin_orig;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
struct in_device *in_dev;
struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
@@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
int ret = -EFAULT;
int tryaddrmatch = 0;
- /*
- * Fetch the caller's info block into kernel space
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- goto out;
- ifr.ifr_name[IFNAMSIZ - 1] = 0;
+ ifr->ifr_name[IFNAMSIZ - 1] = 0;
/* save original address for comparison */
memcpy(&sin_orig, sin, sizeof(*sin));
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
- dev_load(net, ifr.ifr_name);
+ dev_load(net, ifr->ifr_name);
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
@@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
ret = -ENODEV;
- dev = __dev_get_by_name(net, ifr.ifr_name);
+ dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
goto done;
@@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
This is checked above. */
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
ifa->ifa_local) {
break; /* found */
@@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa) {
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next)
- if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
}
}
@@ -1056,19 +1049,19 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local;
- goto rarok;
+ break;
case SIOCGIFBRDADDR: /* Get the broadcast address */
sin->sin_addr.s_addr = ifa->ifa_broadcast;
- goto rarok;
+ break;
case SIOCGIFDSTADDR: /* Get the destination address */
sin->sin_addr.s_addr = ifa->ifa_address;
- goto rarok;
+ break;
case SIOCGIFNETMASK: /* Get the netmask for the interface */
sin->sin_addr.s_addr = ifa->ifa_mask;
- goto rarok;
+ break;
case SIOCSIFFLAGS:
if (colon) {
@@ -1076,11 +1069,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa)
break;
ret = 0;
- if (!(ifr.ifr_flags & IFF_UP))
+ if (!(ifr->ifr_flags & IFF_UP))
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr.ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
@@ -1095,7 +1088,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
INIT_HLIST_NODE(&ifa->hash);
if (colon)
- memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+ memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else {
@@ -1182,28 +1175,27 @@ done:
rtnl_unlock();
out:
return ret;
-rarok:
- rtnl_unlock();
- ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
- goto out;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
+ if (WARN_ON(size > sizeof(struct ifreq)))
+ goto out;
+
if (!in_dev)
goto out;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (!buf) {
- done += sizeof(ifr);
+ done += size;
continue;
}
- if (len < (int) sizeof(ifr))
+ if (len < size)
break;
memset(&ifr, 0, sizeof(struct ifreq));
strcpy(ifr.ifr_name, ifa->ifa_label);
@@ -1212,13 +1204,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
ifa->ifa_local;
- if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
+ if (copy_to_user(buf + done, &ifr, size)) {
done = -EFAULT;
break;
}
- buf += sizeof(struct ifreq);
- len -= sizeof(struct ifreq);
- done += sizeof(struct ifreq);
+ len -= size;
+ done += size;
}
out:
return done;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 32fbd9b..da5635f 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 08259d0..f05afaf 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
* Handle IP routing ioctl calls.
* These are used to manipulate the routing tables
*/
-int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
{
struct fib_config cfg;
- struct rtentry rt;
int err;
switch (cmd) {
@@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&rt, arg, sizeof(rt)))
- return -EFAULT;
-
rtnl_lock();
- err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 02f00be..10f7f74 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
return htonl(INADDR_ANY);
for_ifa(in_dev) {
- if (inet_ifa_match(fl4->saddr, ifa))
+ if (fl4->saddr == ifa->ifa_local)
return fl4->saddr;
} endfor_ifa(in_dev);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b61f228..6ec670f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
+ u32 id, u32 index,
bool truncate, bool is_ipv4);
static unsigned int ipgre_net_id __read_mostly;
@@ -273,12 +273,12 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
iph = ip_hdr(skb);
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
+ ver = ershdr->ver;
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
@@ -324,14 +324,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (ver == 1) {
tunnel->index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->dir = dir;
- tunnel->hwid = hwid;
+ tunnel->dir = pkt_md->u.md2.dir;
+ tunnel->hwid = get_hwid(&pkt_md->u.md2);
}
}
@@ -615,19 +609,14 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (version == 1) {
- erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
} else if (version == 2) {
- u16 md2_flags;
- u8 direction;
- u16 hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- direction = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
- erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id),
- direction, hwid, truncate, true);
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
} else {
goto err_free_rt;
}
@@ -733,10 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 1)
- erspan_build_header(skb, tunnel->parms.o_key, tunnel->index,
+ erspan_build_header(skb, ntohl(tunnel->parms.o_key),
+ tunnel->index,
truncate, true);
else
- erspan_build_header_v2(skb, tunnel->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
tunnel->dir, tunnel->hwid,
truncate, true);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb..6cc70fa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct net_device *dev = NULL;
int ifindex;
+ int midx;
if (optlen != sizeof(int))
goto e_inval;
@@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
dev_put(dev);
err = -EINVAL;
- if (sk->sk_bound_dev_if)
+ if (sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->uc_index = ifindex;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5ddb1cb..141f5e8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -711,9 +711,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ if (tunnel->fwmark) {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
+ }
+ else {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ skb->mark);
+ }
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e9e488e..f75802a 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
sin->sin_port = port;
}
-static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
/*
* Set up interface addresses and routes.
*/
@@ -375,19 +342,19 @@ static int __init ic_setup_if(void)
memset(&ir, 0, sizeof(ir));
strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
set_sockaddr(sin, ic_myaddr, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface address (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface netmask (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
err);
return -1;
@@ -397,11 +364,11 @@ static int __init ic_setup_if(void)
* out, we'll try to muddle along.
*/
if (ic_dev_mtu != 0) {
- strcpy(ir.ifr_name, ic_dev->dev->name);
- ir.ifr_mtu = ic_dev_mtu;
- if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
+ rtnl_lock();
+ if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0)
pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
ic_dev_mtu, err);
+ rtnl_unlock();
}
return 0;
}
@@ -423,7 +390,7 @@ static int __init ic_setup_routes(void)
set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
rm.rt_flags = RTF_UP | RTF_GATEWAY;
- if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+ if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) {
pr_err("IP-Config: Cannot add default route (%d)\n",
err);
return -1;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 136544b..7c50969 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1..4d58e2c 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8533215..3f018f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -977,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
if (connected)
rt = (struct rtable *)sk_dst_check(sk, 0);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b7..ea6e6e7 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8affc6d..63faeee 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 44d109c..3fd1ec7 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a884801..05f070e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -513,8 +513,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ ver = ershdr->ver;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip6gre_tunnel_lookup(skb->dev,
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -565,14 +565,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
if (ver == 1) {
tunnel->parms.index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->parms.dir = dir;
- tunnel->parms.hwid = hwid;
+ tunnel->parms.dir = pkt_md->u.md2.dir;
+ tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
}
ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
@@ -925,6 +919,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
+ __be32 tun_id;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info ||
@@ -944,23 +939,18 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (!md)
goto tx_err;
+ tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
erspan_build_header(skb,
- tunnel_id_to_key32(key->tun_id),
+ ntohl(tun_id),
ntohl(md->u.index), truncate,
false);
} else if (md->version == 2) {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
erspan_build_header_v2(skb,
- tunnel_id_to_key32(key->tun_id),
- dir, hwid, truncate,
- false);
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
}
} else {
switch (skb->protocol) {
@@ -982,11 +972,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
if (t->parms.erspan_ver == 1)
- erspan_build_header(skb, t->parms.o_key,
+ erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
else
- erspan_build_header_v2(skb, t->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a4a9445..997c7f1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,7 +174,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
{
if (!np->autoflowlabel_set)
return ip6_default_np_autolabel(net);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2d4680e..e8ffb5b 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f85da2f..fe3966a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2440,7 +2440,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
- const struct in6_addr *gw_addr)
+ const struct in6_addr *gw_addr,
+ u32 tbid, int flags)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -2449,15 +2450,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
- table = fib6_get_table(net, cfg->fc_table);
+ table = fib6_get_table(net, tbid);
if (!table)
return NULL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
/* if table lookup failed, fall back to full lookup */
@@ -2469,6 +2470,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+static int ip6_route_check_nh_onlink(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct rt6_info *grt;
+ int err;
+
+ err = 0;
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+ if (grt) {
+ if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ err = -EINVAL;
+ }
+
+ ip6_rt_put(grt);
+ }
+
+ return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device **_dev,
+ struct inet6_dev **idev)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ struct net_device *dev = _dev ? *_dev : NULL;
+ struct rt6_info *grt = NULL;
+ int err = -EHOSTUNREACH;
+
+ if (cfg->fc_table) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ if (!grt)
+ goto out;
+
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
+ goto out;
+ }
+ } else {
+ *_dev = dev = grt->dst.dev;
+ *idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+
+ ip6_rt_put(grt);
+
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2520,6 +2597,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+ }
+
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2664,8 +2756,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt = NULL;
-
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
@@ -2682,40 +2772,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
- if (cfg->fc_table) {
- grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
- }
-
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL,
- cfg->fc_ifindex, 1);
-
- err = -EHOSTUNREACH;
- if (!grt)
- goto out;
- if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ err = ip6_route_check_nh_onlink(net, cfg, dev,
+ extack);
} else {
- dev = grt->dst.dev;
- idev = grt->rt6i_idev;
- dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
- ip6_rt_put(grt);
-
if (err)
goto out;
}
@@ -2734,6 +2796,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!dev)
goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2751,6 +2819,7 @@ install_route:
if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
!netif_carrier_ok(dev))
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -3820,6 +3889,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
+ cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -4225,6 +4296,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c92..278e49c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad..2a04dc9 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4e12859..bb935a3 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4e98f2..4a8d407 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
- /* We must prevent loops or risk deadlock ! */
- if (csk->sk_family == PF_KCM)
+ /* Only allow TCP sockets to be attached for now */
+ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
+ csk->sk_protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ /* Don't allow listeners or closed sockets */
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
return -EOPNOTSUPP;
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
return err;
}
- sock_hold(csk);
-
write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is aready by KCM or someone else.
+ * Must be done under lock to prevent race conditions.
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ return -EALREADY;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
csk->sk_data_ready = psock_data_ready;
csk->sk_write_space = psock_write_space;
csk->sk_state_change = psock_state_change;
+
write_unlock_bh(&csk->sk_callback_lock);
+ sock_hold(csk);
+
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh(&mux->lock);
head = &mux->psocks;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index eb55f1b..7322aa1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
#include <net/mpls.h>
#include <net/vxlan.h>
#include <net/tun_proto.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
+ * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
@@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
};
static const struct ovs_len_tbl
@@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+
+ BUILD_BUG_ON(sizeof(struct erspan_metadata) >
+ sizeof(match->key->tun_opts));
+
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (!is_mask)
+ SW_FLOW_KEY_PUT(match, tun_opts_len,
+ sizeof(struct erspan_metadata), false);
+ else
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+ opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask,
+ log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ swkey_tun_opts_len, tun_opts))
+ return -EMSGSIZE;
}
return 0;
@@ -2486,6 +2534,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2e554ef..9920d2f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
sizeof(val));
}
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
- return tcp_sk(tc->t_sock->sk)->snd_nxt;
+ /* seq# of the last byte of data in tcp send buffer */
+ return tcp_sk(tc->t_sock->sk)->write_seq;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index e7858ee..c6fa080 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 73c7476..16f6574 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index af4b8ec..b7ba9b0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
int bind)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tcf_csum_params *params_old, *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
@@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_csum_ops, bind, false);
+ &act_csum_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
p = to_tcf_csum(*a);
- spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
- p->update_flags = parm->update_flags;
- spin_unlock_bh(&p->tcf_lock);
+ ASSERT_RTNL();
+
+ params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+ if (unlikely(!params_new)) {
+ if (ret == ACT_P_CREATED)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
+ params_old = rtnl_dereference(p->params);
+
+ params_new->action = parm->action;
+ params_new->update_flags = parm->update_flags;
+ rcu_assign_pointer(p->params, params_new);
+ if (params_old)
+ kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
- int action;
+ struct tcf_csum_params *params;
u32 update_flags;
+ int action;
+
+ rcu_read_lock();
+ params = rcu_dereference(p->params);
- spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
- bstats_update(&p->tcf_bstats, skb);
- action = p->tcf_action;
- update_flags = p->update_flags;
- spin_unlock(&p->tcf_lock);
+ bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ action = params->action;
if (unlikely(action == TC_ACT_SHOT))
- goto drop;
+ goto drop_stats;
+ update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
@@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
+unlock:
+ rcu_read_unlock();
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+
+drop_stats:
+ qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ goto unlock;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
struct tc_csum opt = {
- .update_flags = p->update_flags,
.index = p->tcf_index,
- .action = p->tcf_action,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
};
struct tcf_t t;
+ params = rtnl_dereference(p->params);
+ opt.action = params->action;
+ opt.update_flags = params->update_flags;
+
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -600,6 +620,15 @@ nla_put_failure:
return -1;
}
+static void tcf_csum_cleanup(struct tc_action *a)
+{
+ struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
+
+ params = rcu_dereference_protected(p->params, 1);
+ kfree_rcu(params, rcu);
+}
+
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
@@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.init = tcf_csum_init,
+ .cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.size = sizeof(struct tcf_csum),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f5d2934..bcb4ccb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -172,9 +172,10 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp)
+static void tcf_proto_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp);
+ tp->ops->destroy(tp, extack);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
@@ -223,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
tcf_chain_head_change(chain, NULL);
while (tp) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
tp = rtnl_dereference(chain->filter_chain);
tcf_chain_put(chain);
}
@@ -1182,7 +1183,7 @@ replay:
tcf_chain_tp_remove(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
err = 0;
goto errout;
}
@@ -1200,7 +1201,7 @@ replay:
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1214,7 +1215,7 @@ replay:
goto errout;
if (last) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
}
goto errout;
case RTM_GETTFILTER:
@@ -1240,7 +1241,7 @@ replay:
RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6088be6..d333f5c 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void basic_destroy(struct tcf_proto *tp)
+static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c11e0fe..8e5326b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -159,14 +159,14 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
skip_sw = prog && tc_skip_sw(prog->gen_flags);
obj = prog ?: oldprog;
- tc_cls_common_offload_init(&cls_bpf.common, tp, extack);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
+ extack);
cls_bpf.command = TC_CLSBPF_OFFLOAD;
cls_bpf.exts = &obj->exts;
cls_bpf.prog = prog ? prog->filter : NULL;
cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
cls_bpf.name = obj->bpf_name;
cls_bpf.exts_integrated = obj->exts_integrated;
- cls_bpf.gen_flags = obj->gen_flags;
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
@@ -212,11 +212,12 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
- struct cls_bpf_prog *prog)
+ struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
int err;
- err = cls_bpf_offload_cmd(tp, NULL, prog, NULL);
+ err = cls_bpf_offload_cmd(tp, NULL, prog, extack);
if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
}
@@ -227,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct tcf_block *block = tp->chain->block;
struct tc_cls_bpf_offload cls_bpf = {};
- tc_cls_common_offload_init(&cls_bpf.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL);
cls_bpf.command = TC_CLSBPF_STATS;
cls_bpf.exts = &prog->exts;
cls_bpf.prog = prog->filter;
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
@@ -290,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
tcf_queue_work(&prog->work);
}
-static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, prog->handle);
- cls_bpf_stop_offload(tp, prog);
+ cls_bpf_stop_offload(tp, prog, extack);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
if (tcf_exts_get_net(&prog->exts))
@@ -309,18 +310,19 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- __cls_bpf_delete(tp, arg);
+ __cls_bpf_delete(tp, arg, extack);
*last = list_empty(&head->plist);
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static void cls_bpf_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link)
- __cls_bpf_delete(tp, prog);
+ __cls_bpf_delete(tp, prog, extack);
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 1b54fbf..762da5c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,7 +143,8 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static void cls_cgroup_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 64c24b4..cd5fe38 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -600,7 +600,7 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 727c103..dc9acaa 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -218,12 +218,13 @@ static void fl_destroy_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
@@ -243,7 +244,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
bool skip_sw = tc_skip_sw(f->flags);
int err;
- tc_cls_common_offload_init(&cls_flower.common, tp, extack);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
cls_flower.dissector = dissector;
@@ -255,7 +256,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
&cls_flower, skip_sw);
if (err < 0) {
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &f->flags);
@@ -272,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
@@ -282,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
&cls_flower, false);
}
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
call_rcu(&f->rcu, fl_destroy_filter);
@@ -315,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static void fl_destroy(struct tcf_proto *tp)
+static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list)
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
@@ -958,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold);
+ fl_hw_destroy_filter(tp, fold, NULL);
}
*arg = fnew;
@@ -997,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
*last = list_empty(&head->filters);
return 0;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 94d159a..8b20772 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fw_destroy(struct tcf_proto *tp)
+static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index d990d2a..2ba721a 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,12 +71,13 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_mall.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
@@ -94,7 +95,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
bool skip_sw = tc_skip_sw(head->flags);
int err;
- tc_cls_common_offload_init(&cls_mall.common, tp, extack);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
@@ -102,7 +103,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
&cls_mall, skip_sw);
if (err < 0) {
- mall_destroy_hw_filter(tp, head, cookie);
+ mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &head->flags);
@@ -114,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp)
+static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -122,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp)
return;
if (!tc_skip_hw(head->flags))
- mall_destroy_hw_filter(tp, head, (unsigned long) head);
+ mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
call_rcu(&head->rcu, mall_destroy_rcu);
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 55467c3..21a03a8 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void route4_destroy(struct tcf_proto *tp)
+static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 5cc0df6..4f12976 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 01a163e..b49cc99 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -581,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static void tcindex_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 7030240..60c892c 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -87,6 +87,7 @@ struct tc_u_hnode {
unsigned int divisor;
struct idr handle_idr;
struct rcu_head rcu;
+ u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
*/
@@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
bool offloaded = false;
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp, extack);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_clear_hw_hnode(tp, h);
+ u32_clear_hw_hnode(tp, h, NULL);
return err;
} else if (err > 0) {
offloaded = true;
@@ -529,12 +531,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
+static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp, NULL);
+ tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
@@ -550,7 +553,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
bool skip_sw = tc_skip_sw(flags);
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp, extack);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
cls_u32.knode.fshift = n->fshift;
@@ -568,7 +571,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &n->flags);
@@ -580,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_knode *n;
unsigned int h;
@@ -590,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, extack);
idr_remove_ext(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
@@ -600,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
}
}
-static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode __rcu **hn;
@@ -608,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
for (phn = rtnl_dereference(*hn);
phn;
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
- u32_clear_hw_hnode(tp, ht);
+ u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
@@ -638,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static void u32_destroy(struct tcf_proto *tp)
+static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -646,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp)
WARN_ON(root_ht == NULL);
if (root_ht && --root_ht->refcnt == 0)
- u32_destroy_hnode(tp, root_ht);
+ u32_destroy_hnode(tp, root_ht, extack);
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
@@ -657,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp)
ht;
ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
}
while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
@@ -684,7 +689,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
goto out;
if (TC_U32_KEY(ht->handle)) {
- u32_remove_hw_knode(tp, (struct tc_u_knode *)ht);
+ u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
@@ -696,7 +701,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
if (ht->refcnt == 1) {
ht->refcnt--;
- u32_destroy_hnode(tp, ht);
+ u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
return -EBUSY;
@@ -1015,6 +1020,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
+ ht->flags = flags;
err = u32_replace_hw_hnode(tp, ht, flags, extack);
if (err) {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d..07c10ba 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
return 0;
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
}
static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b..35bc710 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7ff444e..a40fa53 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4860,9 +4860,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
struct net *net, int *pos, void *p) {
struct rhashtable_iter hti;
struct sctp_transport *tsp;
- int ret = 0;
+ int ret;
again:
+ ret = 0;
sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index daf8075..267e683 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -526,9 +535,13 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -913,6 +921,8 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
@@ -925,8 +935,12 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -936,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -949,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -991,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1008,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1042,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1111,21 +1140,15 @@ out:
static unsigned int smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
-
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- return 0;
+ return mask;
}
static unsigned int smc_poll(struct file *file, struct socket *sock,
@@ -1136,9 +1159,15 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1150,37 +1179,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d1..9518986 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
- smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
+ smc->clcsock->sk->sk_policy[1]) ? true : false;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
- return 0;
+ return false;
}
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d4155ff..3cd086e 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
- if (smc->sk.sk_state != SMC_ACTIVE)
- /* wake up smc_close_wait_tx_pends() */
- smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ int rc;
- return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
- (struct smc_wr_tx_pend_priv **)pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+ if (!conn->alert_token_local)
+ /* abnormal termination */
+ rc = -EPIPE;
+ return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
-bool smc_cdc_tx_has_pending(struct smc_connection *conn)
-{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
- return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, (unsigned long)conn);
-}
-
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -218,6 +212,14 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_data_ready(&smc->sk);
}
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
+ }
+
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -227,15 +229,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
- }
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda..ab240b3 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
-bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e194c6c..e339c01 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_tx_pends(struct smc_sock *smc)
+static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
signed long timeout;
- timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
-
- rc = sk_wait_event(sk, &timeout,
- !smc_cdc_tx_has_pending(&smc->conn),
- &wait);
- if (rc)
+ do {
+ release_sock(sk);
+ if (smc->clcsock)
+ timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
+ timeout);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ if (!smc->clcsock)
break;
- }
+ } while (timeout);
remove_wait_queue(sk_sleep(sk), &wait);
}
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
}
/* terminate smc socket abnormally - active abort
- * RDMA communication no longer possible
+ * link group is terminated, i.e. RDMA communication no longer possible
*/
static void smc_close_active_abort(struct smc_sock *smc)
{
+ struct sock *sk = &smc->sk;
+
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- smc->sk.sk_err = ECONNABORTED;
+ sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
}
- switch (smc->sk.sk_state) {
+ switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
if (!smc_cdc_rxed_any_close(&smc->conn))
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
else
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
- smc->sk.sk_state = SMC_PEERABORTWAIT;
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
+ /* just SHUTDOWN_SEND done */
+ sk->sk_state = SMC_PEERABORTWAIT;
} else {
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
- if (!txflags->peer_conn_closed) {
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
- }
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
- sock_set_flag(&smc->sk, SOCK_DEAD);
- smc->sk.sk_state_change(&smc->sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_state_change(sk);
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
- if (smc->smc_listen_work.func)
- cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ smc_close_wait_listen_clcsock(smc);
}
- release_sock(sk);
smc_close_cleanup_listen(sk);
- cancel_work_sync(&smc->smc_listen_work);
- lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
- if (sk->sk_err != ECONNABORTED) {
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- if (smc_cdc_rxed_any_close(conn))
+ if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
+ sk->sk_state != SMC_APPCLOSEWAIT2)
+ goto again;
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
- smc_close_wait_tx_pends(smc);
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -264,12 +273,8 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
- smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
-/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
- * or peer_done_writing.
+/* Either some kind of closing has been received: peer_conn_closed,
+ * peer_conn_abort, or peer_done_writing
+ * or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
struct sock *sk = &smc->sk;
int old_state;
- lock_sock(&smc->sk);
+ lock_sock(sk);
old_state = sk->sk_state;
if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
goto wakeup;
}
- rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
+ rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
+ /* peer has not received all data */
smc_close_passive_abort_received(smc);
+ release_sock(&smc->sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- if (!smc_cdc_rxed_any_close(&smc->conn))
+ if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ if (smc_cdc_rxed_any_close(conn)) {
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ smc_conn_free(conn);
}
- release_sock(&smc->sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ release_sock(sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_ACTIVE)
+ goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (sk->sk_state == SMC_ACTIVE)
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- else
- goto again;
+ if (rc)
+ break;
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
@@ -446,8 +464,12 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_APPCLOSEWAIT1)
+ goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 8c49888..19eb6a2 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -21,7 +21,6 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f2111..2424c71 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
+ if (list_empty(&lgr->list))
+ goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
+free:
spin_unlock_bh(&smc_lgr_list.lock);
smc_lgr_free(lgr);
}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!lgr)
+ if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
- schedule_work(&conn->close_work);
- sock_put(&smc->sk);
+ write_unlock_bh(&lgr->conns_lock);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
+ write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
+ wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf..427b91c 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
- if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+ smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+ !list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f..2a8957b 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
return rc;
}
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *l;
+
+ list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+ smc_lgr_terminate(lgr);
+ }
+}
+
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
+ if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ smc_ib_port_terminate(smcibdev, port_idx + 1);
}
}
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
- /* fall through */
case IB_EVENT_DEVICE_FATAL:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
- break;
case IB_EVENT_PORT_ACTIVE:
port_idx = ibevent->element.port_num - 1;
set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
- ib_dealloc_pd(lnk->roce_pd);
+ if (lnk->roce_pd)
+ ib_dealloc_pd(lnk->roce_pd);
lnk->roce_pd = NULL;
}
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
+ struct smc_ib_device *smcibdev =
+ (struct smc_ib_device *)ibevent->device;
+ u8 port_idx;
+
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
case IB_EVENT_GID_CHANGE:
case IB_EVENT_PORT_ERR:
case IB_EVENT_QP_ACCESS_ERR:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
+ port_idx = ibevent->element.port_num - 1;
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
break;
default:
break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
- ib_destroy_qp(lnk->roce_qp);
+ if (lnk->roce_qp)
+ ib_destroy_qp(lnk->roce_qp);
lnk->roce_qp = NULL;
}
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
if (!smcibdev->initialized)
return;
+ smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 2e50fdd..838bce2 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
rc = -EPIPE;
break;
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (smc_cdc_rxed_any_close(conn)) {
rc = -ECONNRESET;
break;
}
@@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
+ smc_cdc_rxed_any_close(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
}
@@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
- if (rc)
+ if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ if (conn->alert_token_local) /* connection healthy */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
+ if (smc->sk.sk_err ||
+ !conn->alert_token_local ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ goto out;
+
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
release_sock(&smc->sk);
}
@@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
+ if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+ conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f..1b8af23 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
again:
polled++;
do {
+ memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
+ u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
- u32 idx;
int rc;
*wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
- rc = wait_event_interruptible_timeout(
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ rc = wait_event_timeout(
link->wr_tx_wait,
+ list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- struct smc_link_group *lgr;
-
- lgr = container_of(link, struct smc_link_group,
- lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
- if (rc == -ERESTARTSYS)
- return -EINTR;
if (idx == link->wr_tx_cnt)
return -EPIPE;
}
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
- if (rc)
+ if (rc) {
+ struct smc_link_group *lgr =
+ container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
smc_wr_tx_put_slot(link, priv);
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data)
{
struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
+ struct smc_wr_rx_hdr *wr_tx;
int i;
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
+ wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+ if (wr_tx->type != wr_tx_hdr_type)
continue;
tx_pend = &link->wr_tx_pends[i].priv;
if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- return true;
- }
- return false;
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b..ef0c349 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
diff --git a/net/socket.c b/net/socket.c
index 1536515b..11cc2cd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -961,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* If this ioctl is unknown try to hand it down
* to the NIC driver.
*/
- if (err == -ENOIOCTLCMD)
- err = dev_ioctl(net, cmd, argp);
+ if (err != -ENOIOCTLCMD)
+ return err;
+ if (cmd == SIOCGIFCONF) {
+ struct ifconf ifc;
+ if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
+ return -EFAULT;
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
+ rtnl_unlock();
+ if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
+ err = -EFAULT;
+ } else {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return err;
}
@@ -988,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
sock = file->private_data;
sk = sock->sk;
net = sock_net(sk);
- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
- err = dev_ioctl(net, cmd, argp);
+ if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- err = dev_ioctl(net, cmd, argp);
+ err = wext_handle_ioctl(net, cmd, argp);
} else
#endif
switch (cmd) {
@@ -2654,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
return err;
}
-static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(struct ifreq));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- err = dev_ioctl(net, SIOCGIFNAME, uifr);
- if (err)
- return err;
-
- if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- return 0;
-}
-
-static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
{
struct compat_ifconf ifc32;
struct ifconf ifc;
- struct ifconf __user *uifc;
- struct compat_ifreq __user *ifr32;
- struct ifreq __user *ifr;
- unsigned int i, j;
int err;
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
- memset(&ifc, 0, sizeof(ifc));
- if (ifc32.ifcbuf == 0) {
- ifc32.ifc_len = 0;
- ifc.ifc_len = 0;
- ifc.ifc_req = NULL;
- uifc = compat_alloc_user_space(sizeof(struct ifconf));
- } else {
- size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
- sizeof(struct ifreq);
- uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
- ifc.ifc_len = len;
- ifr = ifc.ifc_req = (void __user *)(uifc + 1);
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
- if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr++;
- ifr32++;
- }
- }
- if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
+ ifc.ifc_len = ifc32.ifc_len;
+ ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
- err = dev_ioctl(net, SIOCGIFCONF, uifc);
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
+ rtnl_unlock();
if (err)
return err;
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- ifr = ifc.ifc_req;
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0, j = 0;
- i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
- i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
- if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr32++;
- ifr++;
- }
-
- if (ifc32.ifcbuf == 0) {
- /* Translate from 64-bit structure multiple to
- * a 32-bit one.
- */
- i = ifc.ifc_len;
- i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
- ifc32.ifc_len = i;
- } else {
- ifc32.ifc_len = i;
- }
+ ifc32.ifc_len = ifc.ifc_len;
if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
@@ -2747,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
struct compat_ethtool_rxnfc __user *compat_rxnfc;
bool convert_in = false, convert_out = false;
- size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
- struct ethtool_rxnfc __user *rxnfc;
- struct ifreq __user *ifr;
+ size_t buf_size = 0;
+ struct ethtool_rxnfc __user *rxnfc = NULL;
+ struct ifreq ifr;
u32 rule_cnt = 0, actual_rule_cnt;
u32 ethcmd;
u32 data;
@@ -2786,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
case ETHTOOL_SRXCLSRLDEL:
buf_size += sizeof(struct ethtool_rxnfc);
convert_in = true;
+ rxnfc = compat_alloc_user_space(buf_size);
break;
}
- ifr = compat_alloc_user_space(buf_size);
- rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
-
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(convert_in ? rxnfc : compat_ptr(data),
- &ifr->ifr_ifru.ifru_data))
- return -EFAULT;
+ ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
if (convert_in) {
/* We expect there to be holes between fs.m_ext and
@@ -2825,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
return -EFAULT;
}
- ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+ ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
if (ret)
return ret;
@@ -2866,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
- void __user *uptr;
compat_uptr_t uptr32;
- struct ifreq __user *uifr;
+ struct ifreq ifr;
+ void __user *saved;
+ int err;
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
+ if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
return -EFAULT;
- uptr = compat_ptr(uptr32);
-
- if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
- return -EFAULT;
-
- return dev_ioctl(net, SIOCWANDEV, uifr);
-}
-
-static int bond_ioctl(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *ifr32)
-{
- struct ifreq kifr;
- mm_segment_t old_fs;
- int err;
-
- switch (cmd) {
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd,
- (struct ifreq __user __force *) &kifr);
- set_fs(old_fs);
+ saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- return err;
- default:
- return -ENOIOCTLCMD;
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ if (!err) {
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
+ if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ err = -EFAULT;
}
+ return err;
}
/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
- struct ifreq __user *u_ifreq64;
- char tmp_buf[IFNAMSIZ];
- void __user *data64;
+ struct ifreq ifreq;
u32 data32;
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
- IFNAMSIZ))
- return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
- return -EFAULT;
- data64 = compat_ptr(data32);
-
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
-
- if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
- IFNAMSIZ))
+ if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
+ if (get_user(data32, &u_ifreq32->ifr_data))
return -EFAULT;
+ ifreq.ifr_data = compat_ptr(data32);
- return dev_ioctl(net, cmd, u_ifreq64);
-}
-
-static int dev_ifsioc(struct net *net, struct socket *sock,
- unsigned int cmd, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, NULL);
}
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
@@ -2980,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
{
struct ifreq ifr;
struct compat_ifmap __user *uifmap32;
- mm_segment_t old_fs;
int err;
uifmap32 = &uifr32->ifr_ifru.ifru_map;
@@ -2994,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
- set_fs(old_fs);
+ err = dev_ioctl(net, cmd, &ifr, NULL);
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -3130,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFNAME:
- return dev_ifname32(net, argp);
case SIOCGIFCONF:
- return dev_ifconf(net, argp);
+ return compat_dev_ifconf(net, argp);
case SIOCETHTOOL:
return ethtool_ioctl(net, argp);
case SIOCWANDEV:
@@ -3141,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- return bond_ioctl(net, cmd, argp);
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
@@ -3205,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- return dev_ifsioc(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
case SIOCATMARK:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCGIFNAME:
return sock_do_ioctl(net, sock, cmd, arg);
}
@@ -3365,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sock->ops->ioctl(sock, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-EXPORT_SYMBOL(kernel_sock_ioctl);
-
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 61f394d..0a9b72f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -577,6 +577,8 @@ alloc_payload:
get_page(page);
sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
+ sg_unmark_end(sg);
+
ctx->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 6cdb054..9efbfc7 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev,
}
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
- void __user *arg)
+int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct iw_request_info info = { .cmd = cmd, .flags = 0 };
+ struct iwreq iwr;
int ret;
- ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+
+ ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
ioctl_standard_call,
ioctl_private_call);
if (ret >= 0 &&
IW_IS_GET(cmd) &&
- copy_to_user(arg, iwr, sizeof(struct iwreq)))
+ copy_to_user(arg, &iwr, sizeof(struct iwreq)))
return -EFAULT;
return ret;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 7598250..8e70291 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -147,8 +147,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (!x->type_offload)
return -EINVAL;
- /* We don't yet support UDP encapsulation, TFC padding and ESN. */
- if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if (x->encap || x->tfcpad)
return -EINVAL;
dev = dev_get_by_index(net, xuo->ifindex);
@@ -178,12 +178,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return 0;
}
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ xso->dev = NULL;
+ dev_put(dev);
+ return -EINVAL;
+ }
+
xso->dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->dev = NULL;
dev_put(dev);
return err;
}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 0250181..1d38c6a 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
bitnr = replay_esn->replay_window - (diff - pos);
}
+ xfrm_dev_state_advance_esn(x);
+
nr = bitnr >> 5;
bitnr = bitnr & 0x1F;
replay_esn->bmp[nr] |= (1U << bitnr);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 20b1e41..54e21f1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -317,7 +317,7 @@ retry:
if (!type && try_load) {
request_module("xfrm-offload-%d-%d", family, proto);
- try_load = 0;
+ try_load = false;
goto retry;
}
@@ -2279,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
}
- x->km.state = XFRM_STATE_VALID;
-
error:
return err;
}
@@ -2289,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true, false);
+ int err;
+
+ err = __xfrm_init_state(x, true, false);
+ if (!err)
+ x->km.state = XFRM_STATE_VALID;
+
+ return err;
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index bdb48e5..7f52b8e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
- if (attrs[XFRMA_OFFLOAD_DEV]) {
- err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
- if (err)
- goto error;
- }
-
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (x->km.state == XFRM_STATE_VOID)
+ x->km.state = XFRM_STATE_VALID;
+
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 833b9c1..e78aad0 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -26,6 +26,7 @@ import time
logfile = None
log_level = 1
+skip_extack = False
bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
pp = pprint.PrettyPrinter()
devs = [] # devices we created for clean up
@@ -132,7 +133,7 @@ def rm(f):
if f in files:
files.remove(f)
-def tool(name, args, flags, JSON=True, ns="", fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
params = ""
if JSON:
params += "%s " % (flags["json"])
@@ -140,9 +141,20 @@ def tool(name, args, flags, JSON=True, ns="", fail=True):
if ns != "":
ns = "ip netns exec %s " % (ns)
- ret, out = cmd(ns + name + " " + params + args, fail=fail)
- if JSON and len(out.strip()) != 0:
- return ret, json.loads(out)
+ if include_stderr:
+ ret, stdout, stderr = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=True)
+ else:
+ ret, stdout = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=False)
+
+ if JSON and len(stdout.strip()) != 0:
+ out = json.loads(stdout)
+ else:
+ out = stdout
+
+ if include_stderr:
+ return ret, out, stderr
else:
return ret, out
@@ -181,13 +193,15 @@ def bpftool_map_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
-def ip(args, force=False, JSON=True, ns="", fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
if force:
args = "-force " + args
- return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
-def tc(args, JSON=True, ns="", fail=True):
- return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
def ethtool(dev, opt, args, fail=True):
return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -348,13 +362,19 @@ class NetdevSim:
return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
fail=fail)
- def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
+ def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+ fail=True, include_stderr=False):
+ if verbose:
+ bpf += " verbose"
return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
- force=force, JSON=JSON, fail=fail)
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
- def unset_xdp(self, mode, force=False, JSON=True, fail=True):
+ def unset_xdp(self, mode, force=False, JSON=True,
+ fail=True, include_stderr=False):
return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
- force=force, JSON=JSON, fail=fail)
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
def ip_link_show(self, xdp):
_, link = ip("link show dev %s" % (self['ifname']))
@@ -409,17 +429,41 @@ class NetdevSim:
(len(filters), expected))
return filters
- def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
- fail=True):
+ def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+ chain=None, cls="", params="",
+ fail=True, include_stderr=False):
+ spec = ""
+ if prio is not None:
+ spec += " prio %d" % (prio)
+ if handle:
+ spec += " handle %s" % (handle)
+ if chain is not None:
+ spec += " chain %d" % (chain)
+
+ return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+ cls=cls, params=params),
+ fail=fail, include_stderr=include_stderr)
+
+ def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+ chain=None, da=False, verbose=False,
+ skip_sw=False, skip_hw=False,
+ fail=True, include_stderr=False):
+ cls = "bpf " + bpf
+
params = ""
if da:
params += " da"
+ if verbose:
+ params += " verbose"
if skip_sw:
params += " skip_sw"
if skip_hw:
params += " skip_hw"
- return tc("filter add dev %s ingress bpf %s %s" %
- (self['ifname'], bpf, params), fail=fail)
+
+ return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+ chain=chain, params=params,
+ fail=fail, include_stderr=include_stderr)
def set_ethtool_tc_offloads(self, enable, fail=True):
args = "hw-tc-offload %s" % ("on" if enable else "off")
@@ -491,6 +535,39 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
fail("dev" not in m.keys(), "Device parameters not reported")
fail(dev != m["dev"], "Map's device different than program's")
+def check_extack(output, reference, args):
+ if skip_extack:
+ return
+ lines = output.split("\n")
+ comp = len(lines) >= 2 and lines[1] == reference
+ fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+ check_extack(output, "Error: netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+ fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+ "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+ lines = output.split("\n")
+ for l in reversed(lines):
+ if l == reference:
+ return
+ fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+ res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+ include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_hw=skip_hw, include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+ include_stderr=True)
+ check_no_extack(res, needle)
+
+
# Parse command line
parser = argparse.ArgumentParser()
parser.add_argument("--log", help="output verbose log to given file")
@@ -527,6 +604,14 @@ for s in samples:
skip(ret != 0, "sample %s/%s not found, please compile it" %
(bpf_test_dir, s))
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+ fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+ print("Warning: no extack message in iproute2 output, libmnl missing?")
+ log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+ skip_extack = True
+
# Check if net namespaces seem to work
ns = mknetns()
skip(ns is None, "Could not create a net namespace")
@@ -558,8 +643,10 @@ try:
sim.tc_flush_filters()
start_test("Test TC offloads are off by default...")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ check_extack(err, "Error: TC offload is disabled on net device.", args)
sim.wait_for_flush()
sim.set_ethtool_tc_offloads(True)
@@ -587,13 +674,63 @@ try:
sim.dfs["bpf_tc_non_bound_accept"] = "N"
start_test("Test TC cBPF unbound bytecode doesn't offload...")
- ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "TC bytecode loaded for offload")
+ check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+ args)
sim.wait_for_flush()
+ start_test("Test non-0 chain offload...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+ skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Offloaded a filter to chain other than 0")
+ check_extack(err, "Error: Driver supports only offload of chain 0.", args)
+ sim.tc_flush_filters()
+
+ start_test("Test TC replace...")
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test TC replace bad flags...")
+ for i in range(3):
+ for j in range(3):
+ ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_sw=(j == 1), skip_hw=(j == 2),
+ fail=False)
+ fail(bool(ret) != bool(j),
+ "Software TC incorrect load in replace test, iteration %d" %
+ (j))
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test spurious extack from the driver...")
+ test_spurios_extack(sim, obj, False, "netdevsim")
+ test_spurios_extack(sim, obj, True, "netdevsim")
+
+ sim.set_ethtool_tc_offloads(False)
+
+ test_spurios_extack(sim, obj, False, "TC offload is disabled")
+ test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ sim.tc_flush_filters()
+
start_test("Test TC offloads work...")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret != 0, "TC filter did not load with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test TC offload basics...")
dfs = sim.dfs_get_bound_progs(expected=1)
@@ -669,16 +806,24 @@ try:
"Device parameters reported for non-offloaded program")
start_test("Test XDP prog replace with bad flags...")
- ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
+ ret, _, err = sim.set_xdp(obj, "offload", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Replaced XDP program with a program in different mode")
- ret, _ = sim.set_xdp(obj, "", force=True, fail=False)
+ check_extack_nsim(err, "program loaded with different flags.", args)
+ ret, _, err = sim.set_xdp(obj, "", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Replaced XDP program with a program in different mode")
+ check_extack_nsim(err, "program loaded with different flags.", args)
start_test("Test XDP prog remove with bad flags...")
- ret, _ = sim.unset_xdp("offload", force=True, fail=False)
+ ret, _, err = sim.unset_xdp("offload", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Removed program with a bad mode mode")
- ret, _ = sim.unset_xdp("", force=True, fail=False)
+ check_extack_nsim(err, "program loaded with different flags.", args)
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Removed program with a bad mode mode")
+ check_extack_nsim(err, "program loaded with different flags.", args)
start_test("Test MTU restrictions...")
ret, _ = sim.set_mtu(9000, fail=False)
@@ -687,18 +832,20 @@ try:
sim.unset_xdp("drv")
bpftool_prog_list_wait(expected=0)
sim.set_mtu(9000)
- ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
sim.set_mtu(1500)
sim.wait_for_flush()
start_test("Test XDP offload...")
- sim.set_xdp(obj, "offload")
+ _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
ipl = sim.ip_link_show(xdp=True)
link_xdp = ipl["xdp"]["prog"]
progs = bpftool_prog_list(expected=1)
prog = progs[0]
fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test XDP offload is device bound...")
dfs = sim.dfs_get_bound_progs(expected=1)
@@ -724,25 +871,32 @@ try:
sim2.set_xdp(obj, "offload")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
- ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a different device accepted")
+ check_extack_nsim(err, "program bound to different dev.", args)
sim2.remove()
- ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
rm(pin_file)
bpftool_prog_list_wait(expected=0)
start_test("Test mixing of TC and XDP...")
sim.tc_add_ingress()
sim.set_xdp(obj, "offload")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Loading TC when XDP active should fail")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
sim.unset_xdp("offload")
sim.wait_for_flush()
sim.cls_bpf_add_filter(obj, skip_sw=True)
- ret, _ = sim.set_xdp(obj, "offload", fail=False)
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
fail(ret == 0, "Loading XDP when TC active should fail")
+ check_extack_nsim(err, "TC program is already loaded.", args)
start_test("Test binding TC from pinned...")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
@@ -765,8 +919,10 @@ try:
start_test("Test asking for TC offload of two filters...")
sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
- ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Managed to offload two TC filters at the same time")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
sim.tc_flush_filters(bound=2, total=2)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b4b69c2..9dea963 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1310,7 +1310,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma) && !logging_active) {
+ if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 6231012..743ca5c 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
- ret = vgic_v4_init(kvm);
- if (ret)
- goto out;
+ if (vgic_has_its(kvm)) {
+ ret = vgic_v4_init(kvm);
+ if (ret)
+ goto out;
+ }
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 4a37292..bc42651 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i, nr_vcpus, ret;
- if (!vgic_supports_direct_msis(kvm))
+ if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
if (dist->its_vm.vpes)
OpenPOWER on IntegriCloud