summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/6lowpan_i.h1
-rw-r--r--net/6lowpan/Makefile1
-rw-r--r--net/6lowpan/nhc.h1
-rw-r--r--net/802/Makefile1
-rw-r--r--net/8021q/Makefile1
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/8021q/vlan.h3
-rw-r--r--net/8021q/vlan_core.c7
-rw-r--r--net/8021q/vlan_netlink.c3
-rw-r--r--net/8021q/vlanproc.h1
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/dev.c1
-rw-r--r--net/appletalk/sysctl_net_atalk.c1
-rw-r--r--net/atm/Makefile1
-rw-r--r--net/atm/addr.c1
-rw-r--r--net/atm/addr.h1
-rw-r--r--net/atm/atm_misc.c1
-rw-r--r--net/atm/atm_sysfs.c1
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.h1
-rw-r--r--net/atm/ioctl.c1
-rw-r--r--net/atm/lec.c19
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/atm/lec_arpc.h1
-rw-r--r--net/atm/mpc.c11
-rw-r--r--net/atm/mpc.h1
-rw-r--r--net/atm/mpoa_caches.c1
-rw-r--r--net/atm/mpoa_caches.h1
-rw-r--r--net/atm/mpoa_proc.c1
-rw-r--r--net/atm/proc.c1
-rw-r--r--net/atm/protocols.h1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/raw.c1
-rw-r--r--net/atm/resources.c1
-rw-r--r--net/atm/resources.h1
-rw-r--r--net/atm/signaling.c1
-rw-r--r--net/atm/signaling.h1
-rw-r--r--net/atm/svc.c1
-rw-r--r--net/ax25/Makefile1
-rw-r--r--net/ax25/af_ax25.c7
-rw-r--r--net/ax25/ax25_ds_timer.c9
-rw-r--r--net/ax25/ax25_timer.c41
-rw-r--r--net/batman-adv/bat_iv_ogm.c24
-rw-r--r--net/batman-adv/bat_v.c11
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c12
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/gateway_common.c18
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c12
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/routing.c6
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c10
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/bluetooth/Kconfig10
-rw-r--r--net/bluetooth/Makefile1
-rw-r--r--net/bluetooth/a2mp.c2
-rw-r--r--net/bluetooth/amp.c4
-rw-r--r--net/bluetooth/ecdh_helper.c228
-rw-r--r--net/bluetooth/ecdh_helper.h9
-rw-r--r--net/bluetooth/hci_conn.c6
-rw-r--r--net/bluetooth/hci_core.c35
-rw-r--r--net/bluetooth/hci_event.c46
-rw-r--r--net/bluetooth/hci_request.c21
-rw-r--r--net/bluetooth/hci_request.h1
-rw-r--r--net/bluetooth/hci_sock.c23
-rw-r--r--net/bluetooth/hci_sysfs.c3
-rw-r--r--net/bluetooth/mgmt.c57
-rw-r--r--net/bluetooth/selftest.c48
-rw-r--r--net/bluetooth/smp.c149
-rw-r--r--net/bridge/Makefile3
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_arp_nd_proxy.c469
-rw-r--r--net/bridge/br_device.c27
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c24
-rw-r--r--net/bridge/br_input.c74
-rw-r--r--net/bridge/br_ioctl.c7
-rw-r--r--net/bridge/br_mdb.c1
-rw-r--r--net/bridge/br_multicast.c123
-rw-r--r--net/bridge/br_netlink.c118
-rw-r--r--net/bridge/br_netlink_tunnel.c14
-rw-r--r--net/bridge/br_private.h28
-rw-r--r--net/bridge/br_private_tunnel.h3
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_if.c4
-rw-r--r--net/bridge/br_stp_timer.c50
-rw-r--r--net/bridge/br_switchdev.c1
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/br_vlan.c78
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c4
-rw-r--r--net/bridge/netfilter/ebtable_nat.c4
-rw-r--r--net/bridge/netfilter/ebtables.c17
-rw-r--r--net/caif/Makefile1
-rw-r--r--net/can/Makefile1
-rw-r--r--net/can/af_can.c24
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/bcm.c5
-rw-r--r--net/can/proc.c8
-rw-r--r--net/ceph/Makefile1
-rw-r--r--net/ceph/armor.c1
-rw-r--r--net/ceph/auth.c1
-rw-r--r--net/ceph/auth_none.c1
-rw-r--r--net/ceph/auth_none.h1
-rw-r--r--net/ceph/auth_x.c1
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/auth_x_protocol.h1
-rw-r--r--net/ceph/buffer.c1
-rw-r--r--net/ceph/ceph_fs.c1
-rw-r--r--net/ceph/ceph_strings.c1
-rw-r--r--net/ceph/cls_lock_client.c1
-rw-r--r--net/ceph/crush/crush.c1
-rw-r--r--net/ceph/crush/hash.c1
-rw-r--r--net/ceph/crypto.c1
-rw-r--r--net/ceph/crypto.h1
-rw-r--r--net/ceph/debugfs.c1
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/mon_client.c1
-rw-r--r--net/ceph/msgpool.c1
-rw-r--r--net/ceph/osd_client.c1
-rw-r--r--net/ceph/osdmap.c1
-rw-r--r--net/ceph/pagelist.c1
-rw-r--r--net/ceph/pagevec.c1
-rw-r--r--net/ceph/string_table.c1
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/datagram.c1
-rw-r--r--net/core/dev.c202
-rw-r--r--net/core/dev_ioctl.c14
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c21
-rw-r--r--net/core/fib_rules.c9
-rw-r--r--net/core/filter.c322
-rw-r--r--net/core/gro_cells.c1
-rw-r--r--net/core/net-procfs.c1
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net-traces.c4
-rw-r--r--net/core/net_namespace.c1
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c335
-rw-r--r--net/core/skbuff.c50
-rw-r--r--net/core/sock.c17
-rw-r--r--net/core/sock_reuseport.c13
-rw-r--r--net/core/stream.c1
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/tso.c1
-rw-r--r--net/dccp/Makefile1
-rw-r--r--net/dccp/ccids/ccid2.c10
-rw-r--r--net/dccp/ccids/ccid2.h1
-rw-r--r--net/dccp/ccids/ccid3.c11
-rw-r--r--net/dccp/ccids/ccid3.h1
-rw-r--r--net/dccp/ccids/lib/packet_history.c4
-rw-r--r--net/dccp/ccids/lib/tfrc.c1
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c30
-rw-r--r--net/decnet/Makefile1
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c13
-rw-r--r--net/decnet/dn_fib.c1
-rw-r--r--net/decnet/dn_neigh.c1
-rw-r--r--net/decnet/dn_nsp_in.c8
-rw-r--r--net/decnet/dn_nsp_out.c18
-rw-r--r--net/decnet/dn_route.c8
-rw-r--r--net/decnet/dn_rules.c1
-rw-r--r--net/decnet/dn_table.c1
-rw-r--r--net/decnet/dn_timer.c1
-rw-r--r--net/decnet/sysctl_net_decnet.c1
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa.c68
-rw-r--r--net/dsa/dsa2.c456
-rw-r--r--net/dsa/dsa_priv.h33
-rw-r--r--net/dsa/legacy.c52
-rw-r--r--net/dsa/port.c47
-rw-r--r--net/dsa/slave.c339
-rw-r--r--net/dsa/tag_brcm.c13
-rw-r--r--net/dsa/tag_dsa.c12
-rw-r--r--net/dsa/tag_edsa.c12
-rw-r--r--net/dsa/tag_ksz.c6
-rw-r--r--net/dsa/tag_lan9303.c30
-rw-r--r--net/dsa/tag_mtk.c6
-rw-r--r--net/dsa/tag_qca.c7
-rw-r--r--net/dsa/tag_trailer.c6
-rw-r--r--net/hsr/hsr_device.c9
-rw-r--r--net/hsr/hsr_framereg.c6
-rw-r--r--net/hsr/hsr_framereg.h2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h1
-rw-r--r--net/ieee802154/6lowpan/reassembly.c5
-rw-r--r--net/ieee802154/Makefile1
-rw-r--r--net/ieee802154/core.h1
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ieee802154/nl802154.h1
-rw-r--r--net/ieee802154/rdev-ops.h1
-rw-r--r--net/ieee802154/sysfs.h1
-rw-r--r--net/ieee802154/trace.h1
-rw-r--r--net/ife/ife.c2
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/devinet.c13
-rw-r--r--net/ipv4/fib_frontend.c18
-rw-r--r--net/ipv4/fib_lookup.h1
-rw-r--r--net/ipv4/fib_notifier.c1
-rw-r--r--net/ipv4/fib_semantics.c16
-rw-r--r--net/ipv4/fib_trie.c42
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/icmp.c20
-rw-r--r--net/ipv4/inet_connection_sock.c31
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_hashtables.c5
-rw-r--r--net/ipv4/inet_timewait_sock.c7
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c18
-rw-r--r--net/ipv4/ip_input.c25
-rw-r--r--net/ipv4/ip_options.c1
-rw-r--r--net/ipv4/ip_sockglue.c1
-rw-r--r--net/ipv4/ip_vti.c12
-rw-r--r--net/ipv4/ipconfig.c1
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c3
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/route.c53
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c412
-rw-r--r--net/ipv4/tcp.c139
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_fastopen.c88
-rw-r--r--net/ipv4/tcp_input.c367
-rw-r--r--net/ipv4/tcp_ipv4.c70
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv4/tcp_minisocks.c37
-rw-r--r--net/ipv4/tcp_nv.c47
-rw-r--r--net/ipv4/tcp_output.c318
-rw-r--r--net/ipv4/tcp_recovery.c103
-rw-r--r--net/ipv4/tcp_timer.c77
-rw-r--r--net/ipv4/tcp_vegas.h1
-rw-r--r--net/ipv4/udp.c45
-rw-r--r--net/ipv4/udp_impl.h1
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv4/xfrm4_input.c1
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c359
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c70
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/esp6.c8
-rw-r--r--net/ipv6/exthdrs.c73
-rw-r--r--net/ipv6/exthdrs_core.c3
-rw-r--r--net/ipv6/icmp.c50
-rw-r--r--net/ipv6/ila/ila.h12
-rw-r--r--net/ipv6/ila/ila_common.c104
-rw-r--r--net/ipv6/ila/ila_lwt.c112
-rw-r--r--net/ipv6/ila/ila_xlat.c27
-rw-r--r--net/ipv6/ip6_checksum.c1
-rw-r--r--net/ipv6/ip6_fib.c667
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c21
-rw-r--r--net/ipv6/ip6_icmp.c1
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ip6_tunnel.c12
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ndisc.c9
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c3
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c939
-rw-r--r--net/ipv6/sit.c31
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c33
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/xfrm6_input.c1
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/ipx/af_ipx.c1
-rw-r--r--net/ipx/ipx_proc.c1
-rw-r--r--net/ipx/ipx_route.c1
-rw-r--r--net/ipx/pe2.c1
-rw-r--r--net/ipx/sysctl_net_ipx.c1
-rw-r--r--net/kcm/kcmproc.c1
-rw-r--r--net/l2tp/Makefile1
-rw-r--r--net/l2tp/l2tp_core.c91
-rw-r--r--net/l2tp/l2tp_core.h43
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_eth.c151
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c4
-rw-r--r--net/l2tp/l2tp_netlink.c24
-rw-r--r--net/l2tp/l2tp_ppp.c254
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/lapb/lapb_timer.c18
-rw-r--r--net/llc/llc_c_ac.c27
-rw-r--r--net/llc/llc_conn.c12
-rw-r--r--net/llc/sysctl_net_llc.c1
-rw-r--r--net/mac80211/Makefile4
-rw-r--r--net/mac80211/aead_api.c (renamed from net/mac80211/aes_ccm.c)40
-rw-r--r--net/mac80211/aead_api.h27
-rw-r--r--net/mac80211/aes_ccm.h42
-rw-r--r--net/mac80211/aes_gcm.c109
-rw-r--r--net/mac80211/aes_gcm.h38
-rw-r--r--net/mac80211/agg-rx.c4
-rw-r--r--net/mac80211/cfg.c12
-rw-r--r--net/mac80211/debug.h1
-rw-r--r--net/mac80211/debugfs.h1
-rw-r--r--net/mac80211/debugfs_key.h1
-rw-r--r--net/mac80211/debugfs_netdev.h1
-rw-r--r--net/mac80211/debugfs_sta.h1
-rw-r--r--net/mac80211/driver-ops.h1
-rw-r--r--net/mac80211/ht.c12
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c29
-rw-r--r--net/mac80211/key.c54
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh.h1
-rw-r--r--net/mac80211/mesh_hwmp.c8
-rw-r--r--net/mac80211/mesh_plink.c13
-rw-r--r--net/mac80211/mlme.c33
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/scan.c37
-rw-r--r--net/mac80211/sta_info.c61
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/trace.c1
-rw-r--r--net/mac80211/trace.h1
-rw-r--r--net/mac80211/trace_msg.h1
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c25
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/mac802154/cfg.h1
-rw-r--r--net/mac802154/driver-ops.h1
-rw-r--r--net/mac802154/trace.c1
-rw-r--r--net/mac802154/trace.h1
-rw-r--r--net/mpls/Kconfig1
-rw-r--r--net/mpls/af_mpls.c36
-rw-r--r--net/mpls/internal.h1
-rw-r--r--net/ncsi/internal.h1
-rw-r--r--net/ncsi/ncsi-aen.c2
-rw-r--r--net/ncsi/ncsi-manage.c54
-rw-r--r--net/ncsi/ncsi-rsp.c2
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h12
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c12
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c8
-rw-r--r--net/netfilter/nf_internals.h1
-rw-r--r--net/netfilter/nf_sockopt.c1
-rw-r--r--net/netfilter/nf_tables_api.c10
-rw-r--r--net/netfilter/nft_set_hash.c1
-rw-r--r--net/netfilter/x_tables.c4
-rw-r--r--net/netfilter/xt_bpf.c22
-rw-r--r--net/netfilter/xt_repldata.h1
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/netlabel/Makefile1
-rw-r--r--net/netlink/af_netlink.c46
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netlink/genetlink.c1
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_in.c2
-rw-r--r--net/netrom/nr_route.c62
-rw-r--r--net/netrom/nr_timer.c48
-rw-r--r--net/nfc/Makefile1
-rw-r--r--net/nfc/nci/Makefile1
-rw-r--r--net/openvswitch/Makefile1
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/openvswitch/conntrack.c12
-rw-r--r--net/openvswitch/conntrack.h7
-rw-r--r--net/openvswitch/datapath.c47
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/dp_notify.c4
-rw-r--r--net/openvswitch/flow_netlink.c56
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c58
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/Makefile1
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/psample/psample.c2
-rw-r--r--net/qrtr/qrtr.c377
-rw-r--r--net/qrtr/qrtr.h1
-rw-r--r--net/rds/Makefile1
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h3
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/ib_send.c16
-rw-r--r--net/rds/info.h1
-rw-r--r--net/rds/loop.h1
-rw-r--r--net/rds/rdma_transport.h1
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/rds_single_path.h1
-rw-r--r--net/rds/tcp.h1
-rw-r--r--net/rose/af_rose.c17
-rw-r--r--net/rose/rose_in.c1
-rw-r--r--net/rose/rose_link.c16
-rw-r--r--net/rose/rose_loopback.c9
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/rose_timer.c39
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c31
-rw-r--r--net/rxrpc/input.c1
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/recvmsg.c5
-rw-r--r--net/rxrpc/sendmsg.c108
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c225
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ife.c155
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_meta_mark.c2
-rw-r--r--net/sched/act_meta_skbprio.c2
-rw-r--r--net/sched/act_meta_skbtcindex.c2
-rw-r--r--net/sched/act_mirred.c15
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c5
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c366
-rw-r--r--net/sched/cls_basic.c20
-rw-r--r--net/sched/cls_bpf.c57
-rw-r--r--net/sched/cls_cgroup.c22
-rw-r--r--net/sched/cls_flow.c33
-rw-r--r--net/sched/cls_flower.c68
-rw-r--r--net/sched/cls_fw.c24
-rw-r--r--net/sched/cls_matchall.c77
-rw-r--r--net/sched/cls_route.c19
-rw-r--r--net/sched/cls_rsvp.h19
-rw-r--r--net/sched/cls_tcindex.c43
-rw-r--r--net/sched/cls_u32.c108
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c8
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_cbs.c373
-rw-r--r--net/sched/sch_drr.c3
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_generic.c60
-rw-r--r--net/sched/sch_hfsc.c5
-rw-r--r--net/sched/sch_htb.c5
-rw-r--r--net/sched/sch_ingress.c49
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c273
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c14
-rw-r--r--net/sched/sch_pie.c10
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_red.c89
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sctp/Makefile1
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/input.c24
-rw-r--r--net/sctp/ipv6.c8
-rw-r--r--net/sctp/protocol.c7
-rw-r--r--net/sctp/sctp_diag.c4
-rw-r--r--net/sctp/sm_make_chunk.c9
-rw-r--r--net/sctp/sm_sideeffect.c93
-rw-r--r--net/sctp/socket.c36
-rw-r--r--net/sctp/stream.c32
-rw-r--r--net/sctp/stream_sched.c2
-rw-r--r--net/sctp/transport.c13
-rw-r--r--net/sctp/ulpevent.c2
-rw-r--r--net/smc/af_smc.c42
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c1
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_clc.c1
-rw-r--r--net/smc/smc_clc.h1
-rw-r--r--net/smc/smc_close.c4
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c13
-rw-r--r--net/smc/smc_core.h1
-rw-r--r--net/smc/smc_ib.c27
-rw-r--r--net/smc/smc_ib.h1
-rw-r--r--net/smc/smc_llc.c1
-rw-r--r--net/smc/smc_llc.h1
-rw-r--r--net/smc/smc_pnet.c1
-rw-r--r--net/smc/smc_pnet.h1
-rw-r--r--net/smc/smc_rx.c1
-rw-r--r--net/smc/smc_rx.h1
-rw-r--r--net/smc/smc_tx.c1
-rw-r--r--net/smc/smc_tx.h1
-rw-r--r--net/smc/smc_wr.c1
-rw-r--r--net/smc/smc_wr.h1
-rw-r--r--net/strparser/strparser.c17
-rw-r--r--net/sunrpc/Makefile1
-rw-r--r--net/sunrpc/auth_gss/Makefile1
-rw-r--r--net/sunrpc/auth_null.c1
-rw-r--r--net/sunrpc/auth_unix.c1
-rw-r--r--net/sunrpc/debugfs.c1
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/sched.c8
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c9
-rw-r--r--net/sunrpc/xprt.c45
-rw-r--r--net/sunrpc/xprtmultipath.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c1
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c3
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/Makefile3
-rw-r--r--net/tipc/bcast.c22
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/group.c871
-rw-r--r--net/tipc/group.h73
-rw-r--r--net/tipc/link.c8
-rw-r--r--net/tipc/monitor.c17
-rw-r--r--net/tipc/msg.c17
-rw-r--r--net/tipc/msg.h118
-rw-r--r--net/tipc/name_table.c176
-rw-r--r--net/tipc/name_table.h28
-rw-r--r--net/tipc/node.c50
-rw-r--r--net/tipc/node.h5
-rw-r--r--net/tipc/server.c121
-rw-r--r--net/tipc/server.h5
-rw-r--r--net/tipc/socket.c845
-rw-r--r--net/tipc/subscr.c6
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c1
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/vmw_vsock/Kconfig10
-rw-r--r--net/vmw_vsock/Makefile4
-rw-r--r--net/vmw_vsock/af_vsock.c73
-rw-r--r--net/vmw_vsock/diag.c186
-rw-r--r--net/vmw_vsock/hyperv_transport.c33
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/vmw_vsock/vmci_transport.c34
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wimax/Makefile1
-rw-r--r--net/wireless/.gitignore3
-rw-r--r--net/wireless/Kconfig58
-rw-r--r--net/wireless/Makefile25
-rw-r--r--net/wireless/ap.c1
-rw-r--r--net/wireless/certs/sforshee.x509bin0 -> 680 bytes
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h6
-rw-r--r--net/wireless/db.txt17
-rw-r--r--net/wireless/debugfs.h1
-rw-r--r--net/wireless/ethtool.c1
-rw-r--r--net/wireless/genregdb.awk158
-rw-r--r--net/wireless/ibss.c1
-rw-r--r--net/wireless/mesh.c1
-rw-r--r--net/wireless/mlme.c1
-rw-r--r--net/wireless/nl80211.c213
-rw-r--r--net/wireless/nl80211.h3
-rw-r--r--net/wireless/rdev-ops.h1
-rw-r--r--net/wireless/reg.c452
-rw-r--r--net/wireless/reg.h14
-rw-r--r--net/wireless/regdb.h23
-rw-r--r--net/wireless/scan.c1
-rw-r--r--net/wireless/sme.c96
-rw-r--r--net/wireless/sysfs.h1
-rw-r--r--net/wireless/trace.h1
-rw-r--r--net/wireless/util.c203
-rw-r--r--net/wireless/wext-compat.c1
-rw-r--r--net/wireless/wext-sme.c1
-rw-r--r--net/x25/Makefile1
-rw-r--r--net/x25/af_x25.c9
-rw-r--r--net/x25/sysctl_net_x25.c1
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_in.c1
-rw-r--r--net/x25/x25_timer.c18
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_hash.c1
-rw-r--r--net/xfrm/xfrm_hash.h1
-rw-r--r--net/xfrm/xfrm_input.c7
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c34
-rw-r--r--net/xfrm/xfrm_state.c5
-rw-r--r--net/xfrm/xfrm_sysctl.c1
-rw-r--r--net/xfrm/xfrm_user.c131
633 files changed, 12704 insertions, 5907 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h
index a67caee..53cf446 100644
--- a/net/6lowpan/6lowpan_i.h
+++ b/net/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __6LOWPAN_I_H
#define __6LOWPAN_I_H
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index 12d131a..2247b96 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
6lowpan-y := core.o iphc.o nhc.o ndisc.o
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index 8030414..67951c4 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __6LOWPAN_NHC_H
#define __6LOWPAN_NHC_H
diff --git a/net/802/Makefile b/net/802/Makefile
index 37e654d..19406a8 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux 802.x protocol layers.
#
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 7bc8db0..9b70345 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux VLAN layer.
#
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579..a010350 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
return 0;
}
-int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
- err = netdev_upper_dev_link(real_dev, dev);
+ err = netdev_upper_dev_link(real_dev, dev, extack);
if (err)
goto out_unregister_netdev;
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
- err = register_vlan_dev(new_dev);
+ err = register_vlan_dev(new_dev, NULL);
if (err < 0)
goto out_free_newdev;
@@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev,
vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid;
#endif
+ vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
+
netdev_update_features(vlandev);
}
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65..a8ba510 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BEN_VLAN_802_1Q_INC__
#define __BEN_VLAN_802_1Q_INC__
@@ -107,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id);
void vlan_setup(struct net_device *dev);
-int register_vlan_dev(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e2ed698..64aa9f7 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
@@ -21,6 +22,12 @@ bool vlan_do_receive(struct sk_buff **skbp)
if (unlikely(!skb))
return false;
+ if (unlikely(!(vlan_dev->flags & IFF_UP))) {
+ kfree_skb(skb);
+ *skbp = NULL;
+ return false;
+ }
+
skb->dev = vlan_dev;
if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) {
/* Our lower layer thinks this is not local, let's make sure.
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de..6689c0b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
vlan->vlan_proto = proto;
vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
vlan->real_dev = real_dev;
+ dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
vlan->flags = VLAN_FLAG_REORDER_HDR;
err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
@@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
- return register_vlan_dev(dev);
+ return register_vlan_dev(dev, extack);
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h
index 8838a2e..48cd4b4 100644
--- a/net/8021q/vlanproc.h
+++ b/net/8021q/vlanproc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __BEN_VLAN_PROC_INC__
#define __BEN_VLAN_PROC_INC__
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 697ea7c..c0486cf 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
diff --git a/net/Makefile b/net/Makefile
index ae2fe22..14fede5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux networking.
#
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index e4158b8..284c8e5 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Moved here from drivers/net/net_init.c, which is:
* Written 1993,1994,1995 by Donald Becker.
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index ebb8643..c744a85 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem.
*
diff --git a/net/atm/Makefile b/net/atm/Makefile
index cc50bd1..bfec0f2 100644
--- a/net/atm/Makefile
+++ b/net/atm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the ATM Protocol Families.
#
diff --git a/net/atm/addr.c b/net/atm/addr.c
index dcda35c..0530b63 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/addr.c - Local ATM address registry */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/addr.h b/net/atm/addr.h
index 6837e9e..da3f8484 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/addr.h - Local ATM address registry */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index 876fbe8..a30b83c 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/atm_misc.c - Various functions for use by ATM drivers */
/* Written 1995-2000 by Werner Almesberger, EPFL ICA */
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 350bf62..5d2fed9 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* ATM driver model support. */
#include <linux/kernel.h>
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 65f706e..d4f6029 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
return 1;
}
-static void idle_timer_check(unsigned long dummy)
+static void idle_timer_check(struct timer_list *unused)
{
write_lock(&arp_tbl.lock);
__neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
register_netdevice_notifier(&clip_dev_notifier);
register_inetaddr_notifier(&clip_inet_notifier);
- setup_timer(&idle_timer, idle_timer_check, 0);
+ timer_setup(&idle_timer, idle_timer_check, 0);
#ifdef CONFIG_PROC_FS
{
diff --git a/net/atm/common.h b/net/atm/common.h
index 959436b..d9d5837 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/common.h - ATM sockets (common part for PVC and SVC) */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index bbd3b63..2ff0e5e4 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* ATM ioctl handling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a3d93a1..c976196 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
static void lec_arp_check_expire(struct work_struct *work);
-static void lec_arp_expire_arp(unsigned long data);
+static void lec_arp_expire_arp(struct timer_list *t);
/*
* Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
- setup_timer(&to_return->timer, lec_arp_expire_arp,
- (unsigned long)to_return);
+ timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
to_return->last_used = jiffies;
to_return->priv = priv;
skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
/* Arp sent timer expired */
-static void lec_arp_expire_arp(unsigned long data)
+static void lec_arp_expire_arp(struct timer_list *t)
{
struct lec_arp_table *entry;
- entry = (struct lec_arp_table *)data;
+ entry = from_timer(entry, t, timer);
pr_debug("\n");
if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
}
/* Unknown/unused vcc expire, remove associated entry */
-static void lec_arp_expire_vcc(unsigned long data)
+static void lec_arp_expire_vcc(struct timer_list *t)
{
unsigned long flags;
- struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
+ struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
struct lec_priv *priv = to_remove->priv;
del_timer(&to_remove->timer);
@@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
else
send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
entry->timer.expires = jiffies + (1 * HZ);
- entry->timer.function = lec_arp_expire_arp;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp;
add_timer(&entry->timer);
found = priv->mcast_vcc;
}
@@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->old_recv_push = old_push;
entry->status = ESI_UNKNOWN;
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
hlist_add_head(&entry->next, &priv->lec_no_forward);
add_timer(&entry->timer);
dump_arp_table(priv);
@@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->status = ESI_UNKNOWN;
hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
add_timer(&entry->timer);
pr_debug("After vcc was added\n");
dump_arp_table(priv);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 4149db1..be0e266 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Lan Emulation client header file
*
diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h
index d923f53..1205d87 100644
--- a/net/atm/lec_arpc.h
+++ b/net/atm/lec_arpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Lec arp cache
*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 5677147..883d257 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
static int mpoa_event_listener(struct notifier_block *mpoa_notifier,
unsigned long event, void *dev);
static void mpc_timer_refresh(void);
-static void mpc_cache_check(unsigned long checking_time);
+static void mpc_cache_check(struct timer_list *unused);
static struct llc_snap_hdr llc_snap_mpoa_ctrl = {
0xaa, 0xaa, 0x03,
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
int err;
if (mpcs == NULL) {
- init_timer(&mpc_timer);
mpc_timer_refresh();
/* This lets us now how our LECs are doing */
@@ -1408,15 +1407,17 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
msg_to_mpoad(msg, mpc);
}
+static unsigned long checking_time;
+
static void mpc_timer_refresh(void)
{
mpc_timer.expires = jiffies + (MPC_P2 * HZ);
- mpc_timer.data = mpc_timer.expires;
- mpc_timer.function = mpc_cache_check;
+ checking_time = mpc_timer.expires;
+ mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check;
add_timer(&mpc_timer);
}
-static void mpc_cache_check(unsigned long checking_time)
+static void mpc_cache_check(struct timer_list *unused)
{
struct mpoa_client *mpc = mpcs;
static unsigned long previous_resolving_check_time;
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index cfc7b74..454abd0 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MPC_H_
#define _MPC_H_
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index 4ccaa16..e01450b 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/atmmpc.h>
#include <linux/slab.h>
diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h
index 30fe348..6a26666 100644
--- a/net/atm/mpoa_caches.h
+++ b/net/atm/mpoa_caches.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MPOA_CACHES_H
#define MPOA_CACHES_H
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 2df34eb..8a0c17e 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
#ifdef CONFIG_PROC_FS
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 4caca2a..642f927 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/proc.c - ATM /proc interface
*
* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA
diff --git a/net/atm/protocols.h b/net/atm/protocols.h
index acdfc85..18d4d00 100644
--- a/net/atm/protocols.h
+++ b/net/atm/protocols.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/protocols.h - ATM protocol handler entry points */
/* Written 1995-1997 by Werner Almesberger, EPFL LRC */
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 040207ec..e1140b3 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/pvc.c - ATM PVC sockets */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/raw.c b/net/atm/raw.c
index 821c079..ee10e8d 100644
--- a/net/atm/raw.c
+++ b/net/atm/raw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/raw.c - Raw AAL0 and AAL5 transports */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 9182447..bada395 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/resources.c - Statically allocated resources */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 521431e..048232e 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/resources.h - ATM-related resources */
/* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 0a20f6e..6c11cdf 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/signaling.c - ATM signaling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/signaling.h b/net/atm/signaling.h
index 08b2a69..2df8220 100644
--- a/net/atm/signaling.h
+++ b/net/atm/signaling.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* net/atm/signaling.h - ATM signaling */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 5589de7..c458adc 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* net/atm/svc.c - ATM SVC sockets */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
diff --git a/net/ax25/Makefile b/net/ax25/Makefile
index 43c46d2..2e53aff 100644
--- a/net/ax25/Makefile
+++ b/net/ax25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux AX.25 layer.
#
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f3f9d18..06eac1f 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *);
/*
* Handler for deferred kills.
*/
-static void ax25_destroy_timer(unsigned long data)
+static void ax25_destroy_timer(struct timer_list *t)
{
- ax25_cb *ax25=(ax25_cb *)data;
+ ax25_cb *ax25 = from_timer(ax25, t, dtimer);
struct sock *sk;
sk=ax25->sk;
@@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25)
if (ax25->sk != NULL) {
if (sk_has_allocations(ax25->sk)) {
/* Defer: outstanding buffers */
- setup_timer(&ax25->dtimer, ax25_destroy_timer,
- (unsigned long)ax25);
+ timer_setup(&ax25->dtimer, ax25_destroy_timer, 0);
ax25->dtimer.expires = jiffies + 2 * HZ;
add_timer(&ax25->dtimer);
} else {
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 5fb2104b..e9d1131 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -29,7 +29,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
-static void ax25_ds_timeout(unsigned long);
+static void ax25_ds_timeout(struct timer_list *);
/*
* Add DAMA slave timeout timer to timer list.
@@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long);
void ax25_ds_setup_timer(ax25_dev *ax25_dev)
{
- setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout,
- (unsigned long)ax25_dev);
+ timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0);
}
void ax25_ds_del_timer(ax25_dev *ax25_dev)
@@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev)
* Silently discard all (slave) connections in case our master forgot us...
*/
-static void ax25_ds_timeout(unsigned long arg)
+static void ax25_ds_timeout(struct timer_list *t)
{
- ax25_dev *ax25_dev = (struct ax25_dev *) arg;
+ ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer);
ax25_cb *ax25;
if (ax25_dev == NULL || !ax25_dev->dama.slave)
diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c
index 23a6f38..c47b7ee 100644
--- a/net/ax25/ax25_timer.c
+++ b/net/ax25/ax25_timer.c
@@ -33,20 +33,19 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
-static void ax25_heartbeat_expiry(unsigned long);
-static void ax25_t1timer_expiry(unsigned long);
-static void ax25_t2timer_expiry(unsigned long);
-static void ax25_t3timer_expiry(unsigned long);
-static void ax25_idletimer_expiry(unsigned long);
+static void ax25_heartbeat_expiry(struct timer_list *);
+static void ax25_t1timer_expiry(struct timer_list *);
+static void ax25_t2timer_expiry(struct timer_list *);
+static void ax25_t3timer_expiry(struct timer_list *);
+static void ax25_idletimer_expiry(struct timer_list *);
void ax25_setup_timers(ax25_cb *ax25)
{
- setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25);
- setup_timer(&ax25->idletimer, ax25_idletimer_expiry,
- (unsigned long)ax25);
+ timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0);
+ timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0);
+ timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0);
+ timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0);
+ timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0);
}
void ax25_start_heartbeat(ax25_cb *ax25)
@@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer)
EXPORT_SYMBOL(ax25_display_timer);
-static void ax25_heartbeat_expiry(unsigned long param)
+static void ax25_heartbeat_expiry(struct timer_list *t)
{
int proto = AX25_PROTO_STD_SIMPLEX;
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, timer);
if (ax25->ax25_dev)
proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL];
@@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param)
}
}
-static void ax25_t1timer_expiry(unsigned long param)
+static void ax25_t1timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t1timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param)
}
}
-static void ax25_t2timer_expiry(unsigned long param)
+static void ax25_t2timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t2timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param)
}
}
-static void ax25_t3timer_expiry(unsigned long param)
+static void ax25_t3timer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, t3timer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
@@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param)
}
}
-static void ax25_idletimer_expiry(unsigned long param)
+static void ax25_idletimer_expiry(struct timer_list *t)
{
- ax25_cb *ax25 = (ax25_cb *)param;
+ ax25_cb *ax25 = from_timer(ax25, t, idletimer);
switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) {
case AX25_PROTO_STD_SIMPLEX:
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 83ba548..1b659ab 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
return;
/* the interface gets activated here to avoid race conditions between
@@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
* drops as they can't send and receive at the same time.
*/
tq_iface_penalty = BATADV_TQ_MAX_VALUE;
- if (if_outgoing && (if_incoming == if_outgoing) &&
+ if (if_outgoing && if_incoming == if_outgoing &&
batadv_is_wifi_hardif(if_outgoing))
tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
bat_priv);
@@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
ret = BATADV_NEIGH_DUP;
} else {
set_mark = 0;
- if (is_dup && (ret != BATADV_NEIGH_DUP))
+ if (is_dup && ret != BATADV_NEIGH_DUP)
ret = BATADV_ORIG_DUP;
}
@@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* drop packet if sender is not a direct neighbor and if we
* don't route towards it
*/
- if (!is_single_hop_neigh && (!orig_neigh_router)) {
+ if (!is_single_hop_neigh && !orig_neigh_router) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM via unknown neighbor!\n");
goto out_neigh;
@@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
- if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+ if (is_bidirect && (dup_status == BATADV_NO_DUP ||
(sameseq && similar_ttl))) {
batadv_iv_ogm_orig_update(bat_priv, orig_node,
orig_ifinfo, ethhdr,
@@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* OGMs from secondary interfaces should only scheduled once
* per interface where it has been received, not multiple times
*/
- if ((ogm_packet->ttl <= 2) &&
- (if_incoming != if_outgoing)) {
+ if (ogm_packet->ttl <= 2 &&
+ if_incoming != if_outgoing) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM from secondary interface and wrong outgoing interface\n");
goto out_neigh;
@@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
if_incoming, if_outgoing);
out_neigh:
- if ((orig_neigh_node) && (!is_single_hop_neigh))
+ if (orig_neigh_node && !is_single_hop_neigh)
batadv_orig_node_put(orig_neigh_node);
out:
if (router_ifinfo)
@@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
tmp_gw_factor *= 100 * 100;
tmp_gw_factor >>= 18;
- if ((tmp_gw_factor > max_gw_factor) ||
- ((tmp_gw_factor == max_gw_factor) &&
- (tq_avg > max_tq))) {
+ if (tmp_gw_factor > max_gw_factor ||
+ (tmp_gw_factor == max_gw_factor &&
+ tq_avg > max_tq)) {
if (curr_gw)
batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c..341ceab 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -19,7 +19,6 @@
#include "main.h"
#include <linux/atomic.h>
-#include <linux/bug.h>
#include <linux/cache.h>
#include <linux/errno.h>
#include <linux/if_ether.h>
@@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
int ret = 0;
ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
- if (WARN_ON(!ifinfo1))
+ if (!ifinfo1)
goto err_ifinfo1;
ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
- if (WARN_ON(!ifinfo2))
+ if (!ifinfo2)
goto err_ifinfo2;
ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput;
@@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1,
bool ret = false;
ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
- if (WARN_ON(!ifinfo1))
+ if (!ifinfo1)
goto err_ifinfo1;
ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
- if (WARN_ON(!ifinfo2))
+ if (!ifinfo2)
goto err_ifinfo2;
threshold = ifinfo1->bat_v.throughput / 4;
@@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
goto next;
- if (curr_gw && (bw <= max_bw))
+ if (curr_gw && bw <= max_bw)
goto next;
if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d..1de992c 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
throughput = link_settings.base.speed;
- if (throughput && (throughput != SPEED_UNKNOWN))
+ if (throughput && throughput != SPEED_UNKNOWN)
return throughput * 10;
}
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
goto out;
/* we are in the process of shutting this interface down */
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
goto out;
/* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8be6173..c251445 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
* due to the store & forward characteristics of WIFI.
* Very low throughput values are the exception.
*/
- if ((throughput > 10) &&
- (if_incoming == if_outgoing) &&
+ if (throughput > 10 &&
+ if_incoming == if_outgoing &&
!(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
return throughput / 2;
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
/* drop packets with old seqnos, however accept the first packet after
* a host has been rebooted.
*/
- if ((seq_diff < 0) && !protection_started)
+ if (seq_diff < 0 && !protection_started)
goto out;
neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
router_throughput = router_ifinfo->bat_v.throughput;
neigh_throughput = neigh_ifinfo->bat_v.throughput;
- if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
- (router_throughput >= neigh_throughput))
+ if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
+ router_throughput >= neigh_throughput)
goto out;
}
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
return;
/* only unknown & newer OGMs contain TVLVs we are interested in */
- if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
+ if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
batadv_tvlv_containers_process(bat_priv, true, orig_node,
NULL, NULL,
(unsigned char *)(ogm2 + 1),
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b6cfa78..760c0de 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
/* this is an hash collision with the temporary selected node. Choose
* the one with the lowest address
*/
- if ((tmp_max == max) && max_orig_node &&
- (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+ if (tmp_max == max && max_orig_node &&
+ batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
goto out;
ret = true;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d..10d521f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
}
}
- if ((curr_gw) && (!next_gw)) {
+ if (curr_gw && !next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Removing selected gateway - no gateway in range\n");
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
NULL);
- } else if ((!curr_gw) && (next_gw)) {
+ } else if (!curr_gw && next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
goto out;
}
- if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
- (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+ if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
+ gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
goto out;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5..2c26039 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
bw_unit_type = BATADV_BW_UNIT_MBIT;
- if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
- (bw_unit_type == BATADV_BW_UNIT_MBIT))
+ if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
+ bw_unit_type == BATADV_BW_UNIT_MBIT)
*tmp_ptr = '\0';
}
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
if (!up_new)
up_new = 1;
- if ((down_curr == down_new) && (up_curr == up_new))
+ if (down_curr == down_new && up_curr == up_new)
return count;
batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
/* only fetch the tvlv value if the handler wasn't called via the
* CIFNOTFND flag and if there is data to fetch
*/
- if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
- (tvlv_value_len < sizeof(gateway))) {
+ if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
+ tvlv_value_len < sizeof(gateway)) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
} else {
gateway_ptr = tvlv_value;
gateway.bandwidth_down = gateway_ptr->bandwidth_down;
gateway.bandwidth_up = gateway_ptr->bandwidth_up;
- if ((gateway.bandwidth_down == 0) ||
- (gateway.bandwidth_up == 0)) {
+ if (gateway.bandwidth_down == 0 ||
+ gateway.bandwidth_up == 0) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
}
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig, &gateway);
/* restart gateway selection */
- if ((gateway.bandwidth_down != 0) &&
- (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
+ if (gateway.bandwidth_down != 0 &&
+ atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
batadv_gw_check_election(bat_priv, orig);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76..4e3d534 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
static void
batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
{
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
return;
hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv = netdev_priv(hard_iface->soft_iface);
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
- soft_iface, NULL, NULL);
+ soft_iface, NULL, NULL, NULL);
if (ret)
goto err_dev;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292..bded311 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
size_t packet_len;
int error;
- if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+ if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
return -EAGAIN;
- if ((!buf) || (count < sizeof(struct batadv_icmp_packet)))
+ if (!buf || count < sizeof(struct batadv_icmp_packet))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb..4daed7ad 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
* list traversals just rcu-locked
*/
struct list_head batadv_hardif_list;
-static int (*batadv_rx_handler[256])(struct sk_buff *,
- struct batadv_hard_iface *);
+static int (*batadv_rx_handler[256])(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *))
{
- int (*curr)(struct sk_buff *,
- struct batadv_hard_iface *);
+ int (*curr)(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
curr = batadv_rx_handler[packet_type];
- if ((curr != batadv_recv_unhandled_packet) &&
- (curr != batadv_recv_unhandled_unicast_packet))
+ if (curr != batadv_recv_unhandled_packet &&
+ curr != batadv_recv_unhandled_unicast_packet)
return -EBUSY;
batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 05cc763..edb2f23 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.3"
+#define BATADV_SOURCE_VERSION "2017.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670..e553a87 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
bool orig_initialized;
if (orig_mcast_enabled && tvlv_value &&
- (tvlv_value_len >= sizeof(mcast_flags)))
+ tvlv_value_len >= sizeof(mcast_flags))
mcast_flags = *(u8 *)tvlv_value;
spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b2..2967b86 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
last_seen = neigh_node->last_seen;
if_incoming = neigh_node->if_incoming;
- if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) ||
- (if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
- if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
+ if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
+ if (if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff..40d9bf3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
batadv_orig_ifinfo_put(orig_ifinfo);
/* route deleted */
- if ((curr_router) && (!neigh_node)) {
+ if (curr_router && !neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Deleting route towards: %pM\n", orig_node->orig);
batadv_tt_global_del_orig(bat_priv, orig_node, -1,
"Deleted route towards originator");
/* route added */
- } else if ((!curr_router) && (neigh_node)) {
+ } else if (!curr_router && neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Adding route towards: %pM (via %pM)\n",
orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* add record route information if not full */
if ((icmph->msg_type == BATADV_ECHO_REPLY ||
icmph->msg_type == BATADV_ECHO_REQUEST) &&
- (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+ skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
if (skb_linearize(skb) < 0)
goto free_skb;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 054a65e..7895323 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
- if ((hardif_neigh) && (ret != NET_XMIT_DROP))
+ if (hardif_neigh && ret != NET_XMIT_DROP)
hardif_neigh->bat_v.last_unicast_tx = jiffies;
if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
* we delete only packets belonging to the given interface
*/
if (hard_iface &&
- (forw_packet->if_incoming != hard_iface) &&
- (forw_packet->if_outgoing != hard_iface))
+ forw_packet->if_incoming != hard_iface &&
+ forw_packet->if_outgoing != hard_iface)
continue;
hlist_del(&forw_packet->list);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index c2c9867..9f673cd 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
int result;
/* TODO: We must check if we can release all references to non-payload
- * data using __skb_header_release in our skbs to allow skb_cow_header to
- * work optimally. This means that those skbs are not allowed to read
+ * data using __skb_header_release in our skbs to allow skb_cow_header
+ * to work optimally. This means that those skbs are not allowed to read
* or write any data which is before the current position of skb->data
* after that call and thus allow other skbs with the same data buffer
* to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
/* check ranges */
- if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
+ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
@@ -863,11 +863,13 @@ free_bat_counters:
* batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
* @dev: batadv_soft_interface used as master interface
* @slave_dev: net_device which should become the slave interface
+ * @extack: extended ACK report struct
*
* Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_add(struct net_device *dev,
- struct net_device *slave_dev)
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct batadv_hard_iface *hard_iface;
struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30..aa187fd 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
if (hard_iface->if_status == status_tmp)
goto out;
- if ((hard_iface->soft_iface) &&
- (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
+ if (hard_iface->soft_iface &&
+ strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
goto out;
if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8eff..4b90033 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
/* send the ack */
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+ if (unlikely(r < 0) || r == NET_XMIT_DROP) {
ret = BATADV_TP_REASON_DST_UNREACHABLE;
goto out;
}
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index c18115d..db82a40 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -126,14 +126,4 @@ config BT_DEBUGFS
Provide extensive information about internal Bluetooth states
in debugfs.
-config BT_LEGACY_IOCTL
- bool "Enable legacy ioctl interfaces"
- depends on BT && BT_BREDR
- default y
- help
- Enable support for legacy ioctl interfaces. This is only needed
- for old and deprecated applications using direct ioctl calls for
- controller management. Since Linux 3.4 all configuration and
- setup is done via mgmt interface and this is no longer needed.
-
source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 5d0a113..fda41c0 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Bluetooth subsystem.
#
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index aad994e..51c2cf2 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK,
&mgr->l2cap_conn->hcon->dst);
if (!hcon) {
- BT_ERR("No phys link exist");
+ bt_dev_err(hdev, "no phys link exist");
rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS;
goto clean;
}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ebcab5b..78bec8d 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
/* Legacy key */
if (conn->key_type < 3) {
- BT_ERR("Legacy key type %d", conn->key_type);
+ bt_dev_err(hdev, "legacy key type %d", conn->key_type);
return -EACCES;
}
@@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
/* Derive Generic AMP Link Key (gamp) */
err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key);
if (err) {
- BT_ERR("Could not derive Generic AMP Key: err %d", err);
+ bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err);
return err;
}
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9a..2155ce8 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
#include "ecdh_helper.h"
#include <linux/scatterlist.h>
-#include <crypto/kpp.h>
#include <crypto/ecdh.h>
struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
out[i] = __swab64(in[ndigits - 1 - i]);
}
-bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
- u8 secret[32])
+/* compute_ecdh_secret() - function assumes that the private key was
+ * already set.
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: pair's ecc public key.
+ * secret: memory where the ecdh computed shared secret will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
+ u8 secret[32])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist src, dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
- /* Security Manager Protocol holds digits in litte-endian order
- * while ECC API expect big-endian data
- */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- p.key = (char *)tmp;
- p.key_size = 32;
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- crypto_ecdh_encode_key(buf, buf_len, &p);
-
- /* Set A private Key */
- err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
- if (err)
- goto free_all;
-
swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
memcpy(secret, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
- kfree(tmp);
- return (err == 0);
+ kzfree(tmp);
+ return err;
}
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
+/* set_ecdh_privkey() - set or generate ecc private key.
+ *
+ * Function generates an ecc private key in the crypto subsystem when receiving
+ * a NULL private key or sets the received key when not NULL.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @private_key: user's ecc private key. When not NULL, the key is expected
+ * in little endian format.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
+{
+ u8 *buf, *tmp = NULL;
+ unsigned int buf_len;
+ int err;
+ struct ecdh p = {0};
+
+ p.curve_id = ECC_CURVE_NIST_P256;
+
+ if (private_key) {
+ tmp = kmalloc(32, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+ p.key = tmp;
+ p.key_size = 32;
+ }
+
+ buf_len = crypto_ecdh_key_len(&p);
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ err = -ENOMEM;
+ goto free_tmp;
+ }
+
+ err = crypto_ecdh_encode_key(buf, buf_len, &p);
+ if (err)
+ goto free_all;
+
+ err = crypto_kpp_set_secret(tfm, buf, buf_len);
+ /* fall through */
+free_all:
+ kzfree(buf);
+free_tmp:
+ kzfree(tmp);
+ return err;
+}
+
+/* generate_ecdh_public_key() - function assumes that the private key was
+ * already set.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
- const unsigned short max_tries = 16;
- unsigned short tries = 0;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
+ sg_init_one(&dst, tmp, 64);
+ kpp_request_set_input(req, NULL, 0);
+ kpp_request_set_output(req, &dst, 64);
+ kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ecdh_complete, &result);
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- p.key_size = 32;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- do {
- if (tries++ >= max_tries)
- goto free_all;
-
- /* Set private Key */
- p.key = (char *)private_key;
- crypto_ecdh_encode_key(buf, buf_len, &p);
- err = crypto_kpp_set_secret(tfm, buf, buf_len);
- if (err)
- goto free_all;
-
- sg_init_one(&dst, tmp, 64);
- kpp_request_set_input(req, NULL, 0);
- kpp_request_set_output(req, &dst, 64);
- kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- ecdh_complete, &result);
-
- err = crypto_kpp_generate_public_key(req);
-
- if (err == -EINPROGRESS) {
- wait_for_completion(&result.completion);
- err = result.err;
- }
-
- /* Private key is not valid. Regenerate */
- if (err == -EINVAL)
- continue;
-
- if (err < 0)
- goto free_all;
- else
- break;
-
- } while (true);
-
- /* Keys are handed back in little endian as expected by Security
- * Manager Protocol
+ err = crypto_kpp_generate_public_key(req);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(&result.completion);
+ err = result.err;
+ }
+ if (err < 0)
+ goto free_all;
+
+ /* The public key is handed back in little endian as expected by
+ * the Security Manager Protocol.
*/
swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- memcpy(private_key, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
kfree(tmp);
- return (err == 0);
+ return err;
+}
+
+/* generate_ecdh_keys() - generate ecc key pair.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
+{
+ int err;
+
+ err = set_ecdh_privkey(tfm, NULL);
+ if (err)
+ return err;
+
+ return generate_ecdh_public_key(tfm, public_key);
}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423fa..a6f8d03 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
* COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
* SOFTWARE IS DISCLAIMED.
*/
+#include <crypto/kpp.h>
#include <linux/types.h>
-bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32],
- u8 secret[32]);
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]);
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
+ u8 secret[32]);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index dc59eae..a968253 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
goto done;
}
- BT_ERR("HCI request failed to create LE connection: status 0x%2.2x",
- status);
+ bt_dev_err(hdev, "request failed to create LE connection: "
+ "status 0x%2.2x", status);
if (!conn)
goto done;
@@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
*/
if (hci_dev_test_flag(hdev, HCI_LE_SCAN) &&
hdev->le_scan_type == LE_SCAN_ACTIVE) {
- skb_queue_purge(&req.cmd_q);
+ hci_req_purge(&req);
hci_conn_del(conn);
return ERR_PTR(-EBUSY);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 6bc679c..40d260f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt)
amp_init1(req);
break;
default:
- BT_ERR("Unknown device type %d", hdev->dev_type);
+ bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type);
break;
}
@@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work)
if (hdev->hw_error)
hdev->hw_error(hdev, hdev->hw_error_code);
else
- BT_ERR("%s hardware error 0x%2.2x", hdev->name,
- hdev->hw_error_code);
+ bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
if (hci_dev_do_close(hdev))
return;
@@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work)
struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
u16 opcode = __le16_to_cpu(sent->opcode);
- BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode);
+ bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode);
} else {
- BT_ERR("%s command tx timeout", hdev->name);
+ bt_dev_err(hdev, "command tx timeout");
}
atomic_set(&hdev->cmd_cnt, 1);
@@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params) {
- BT_ERR("Out of memory");
+ bt_dev_err(hdev, "out of memory");
return NULL;
}
@@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
err = hdev->send(hdev, skb);
if (err < 0) {
- BT_ERR("%s sending frame failed (%d)", hdev->name, err);
+ bt_dev_err(hdev, "sending frame failed (%d)", err);
kfree_skb(skb);
}
}
@@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
skb = hci_prepare_cmd(hdev, opcode, plen, param);
if (!skb) {
- BT_ERR("%s no memory for command", hdev->name);
+ bt_dev_err(hdev, "no memory for command");
return -ENOMEM;
}
@@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue,
hci_add_acl_hdr(skb, chan->handle, flags);
break;
default:
- BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+ bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
return;
}
@@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type,
break;
default:
cnt = 0;
- BT_ERR("Unknown link type");
+ bt_dev_err(hdev, "unknown link type %d", conn->type);
}
q = cnt / num;
@@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
struct hci_conn_hash *h = &hdev->conn_hash;
struct hci_conn *c;
- BT_ERR("%s link tx timeout", hdev->name);
+ bt_dev_err(hdev, "link tx timeout");
rcu_read_lock();
/* Kill stalled connections */
list_for_each_entry_rcu(c, &h->list, list) {
if (c->type == type && c->sent) {
- BT_ERR("%s killing stalled connection %pMR",
- hdev->name, &c->dst);
+ bt_dev_err(hdev, "killing stalled connection %pMR",
+ &c->dst);
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
}
}
@@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
break;
default:
cnt = 0;
- BT_ERR("Unknown link type");
+ bt_dev_err(hdev, "unknown link type %d", chan->conn->type);
}
q = cnt / num;
@@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
l2cap_recv_acldata(conn, skb, flags);
return;
} else {
- BT_ERR("%s ACL packet for unknown connection handle %d",
- hdev->name, handle);
+ bt_dev_err(hdev, "ACL packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
@@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
sco_recv_scodata(conn, skb);
return;
} else {
- BT_ERR("%s SCO packet for unknown connection handle %d",
- hdev->name, handle);
+ bt_dev_err(hdev, "SCO packet for unknown connection handle %d",
+ handle);
}
kfree_skb(skb);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0b4dba0..cd3bbb7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
break;
default:
- BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
+ bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d",
+ cp->enable);
break;
}
@@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr,
HCI_ROLE_MASTER);
if (!conn)
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
}
}
@@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
HCI_ROLE_SLAVE);
if (!conn) {
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
hci_dev_unlock(hdev);
return;
}
@@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!hci_conn_ssp_enabled(conn) &&
test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
- BT_INFO("re-auth of legacy device is not possible.");
+ bt_dev_info(hdev, "re-auth of legacy device is not possible.");
} else {
set_bit(HCI_CONN_AUTH, &conn->flags);
conn->sec_level = conn->pending_sec_level;
@@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
BT_DBG("%s status 0x%02x", hdev->name, status);
if (!skb || skb->len < sizeof(*rp)) {
- BT_ERR("%s invalid HCI Read Encryption Key Size response",
- hdev->name);
+ bt_dev_err(hdev, "invalid read key size response");
return;
}
@@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
* supported.
*/
if (rp->status) {
- BT_ERR("%s failed to read key size for handle %u", hdev->name,
- handle);
+ bt_dev_err(hdev, "failed to read key size for handle %u",
+ handle);
conn->enc_key_size = HCI_LINK_KEY_SIZE;
} else {
conn->enc_key_size = rp->key_size;
@@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp);
if (hci_req_run_skb(&req, read_enc_key_size_complete)) {
- BT_ERR("Sending HCI Read Encryption Key Size failed");
+ bt_dev_err(hdev, "sending read key size failed");
conn->enc_key_size = HCI_LINK_KEY_SIZE;
goto notify;
}
@@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
int i;
if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
- BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+ bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
return;
}
@@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
default:
- BT_ERR("Unknown type %d conn %p", conn->type, conn);
+ bt_dev_err(hdev, "unknown type %d conn %p",
+ conn->type, conn);
break;
}
}
@@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev,
return chan->conn;
break;
default:
- BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type);
+ bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type);
break;
}
@@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
int i;
if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) {
- BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
+ bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode);
return;
}
@@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb)
break;
default:
- BT_ERR("Unknown type %d conn %p", conn->type, conn);
+ bt_dev_err(hdev, "unknown type %d conn %p",
+ conn->type, conn);
break;
}
}
@@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!conn) {
conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
if (!conn) {
- BT_ERR("No memory for new connection");
+ bt_dev_err(hdev, "no memory for new connection");
goto unlock;
}
@@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
case LE_ADV_SCAN_RSP:
break;
default:
- BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x",
- type);
+ bt_dev_err_ratelimited(hdev, "unknown advertising packet "
+ "type: 0x%02x", type);
return;
}
@@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
/* Adjust for actual length */
if (len != real_len) {
- BT_ERR_RATELIMITED("%s advertising data length corrected",
- hdev->name);
+ bt_dev_err_ratelimited(hdev, "advertising data len corrected");
len = real_len;
}
@@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return false;
if (skb->len < sizeof(*hdr)) {
- BT_ERR("Too short HCI event");
+ bt_dev_err(hdev, "too short HCI event");
return false;
}
@@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
}
if (hdr->evt != HCI_EV_CMD_COMPLETE) {
- BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
+ bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
+ hdr->evt);
return false;
}
if (skb->len < sizeof(*ev)) {
- BT_ERR("Too short cmd_complete event");
+ bt_dev_err(hdev, "too short cmd_complete event");
return false;
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b73ac14..abc0f32 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
req->err = 0;
}
+void hci_req_purge(struct hci_request *req)
+{
+ skb_queue_purge(&req->cmd_q);
+}
+
static int req_run(struct hci_request *req, hci_req_complete_t complete,
hci_req_complete_skb_t complete_skb)
{
@@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
skb = hci_prepare_cmd(hdev, opcode, plen, param);
if (!skb) {
- BT_ERR("%s no memory for command (opcode 0x%4.4x)",
- hdev->name, opcode);
+ bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)",
+ opcode);
req->err = -ENOMEM;
return;
}
@@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
if (err < 0) {
- BT_ERR("%s failed to generate new RPA", hdev->name);
+ bt_dev_err(hdev, "failed to generate new RPA");
return err;
}
@@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason)
err = hci_req_run(&req, abort_conn_complete);
if (err && err != -ENODATA) {
- BT_ERR("Failed to run HCI request: err %d", err);
+ bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err);
return err;
}
@@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work)
hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Failed to disable LE scan: status 0x%02x", status);
+ bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x",
+ status);
return;
}
@@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work)
hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN,
HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Inquiry failed: status 0x%02x", status);
+ bt_dev_err(hdev, "inquiry failed: status 0x%02x", status);
goto discov_stopped;
}
@@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work)
hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status);
if (status) {
- BT_ERR("Failed to restart LE scan: status %d", status);
+ bt_dev_err(hdev, "failed to restart LE scan: status %d",
+ status);
return;
}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index dde77bd..702beb1 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -36,6 +36,7 @@ struct hci_request {
};
void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
+void hci_req_purge(struct hci_request *req);
int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete);
void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 0bad296..923e9a2 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
}
/* Send frame to sockets with specific channel */
-void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
- int flag, struct sock *skip_sk)
+static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+ int flag, struct sock *skip_sk)
{
struct sock *sk;
BT_DBG("channel %u len %d", channel, skb->len);
- read_lock(&hci_sk_list.lock);
-
sk_for_each(sk, &hci_sk_list.head) {
struct sk_buff *nskb;
@@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
kfree_skb(nskb);
}
+}
+
+void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
+ int flag, struct sock *skip_sk)
+{
+ read_lock(&hci_sk_list.lock);
+ __hci_send_to_channel(channel, skb, flag, skip_sk);
read_unlock(&hci_sk_list.lock);
}
@@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
hdr->index = index;
hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
- hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
- HCI_SOCK_TRUSTED, NULL);
+ __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+ HCI_SOCK_TRUSTED, NULL);
kfree_skb(skb);
}
@@ -878,7 +883,6 @@ static int hci_sock_release(struct socket *sock)
return 0;
}
-#ifdef CONFIG_BT_LEGACY_IOCTL
static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
{
bdaddr_t bdaddr;
@@ -1050,7 +1054,6 @@ done:
release_sock(sk);
return err;
}
-#endif
static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
@@ -1971,11 +1974,7 @@ static const struct proto_ops hci_sock_ops = {
.getname = hci_sock_getname,
.sendmsg = hci_sock_sendmsg,
.recvmsg = hci_sock_recvmsg,
-#ifdef CONFIG_BT_LEGACY_IOCTL
.ioctl = hci_sock_ioctl,
-#else
- .ioctl = sock_no_ioctl,
-#endif
.poll = datagram_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index aa300f3..9874844 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Bluetooth HCI driver model support. */
#include <linux/module.h>
@@ -50,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
if (device_add(&conn->dev) < 0) {
- BT_ERR("Failed to register connection device");
+ bt_dev_err(hdev, "failed to register connection device");
return;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a0..6e9fc86 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
key_count = __le16_to_cpu(cp->key_count);
if (key_count > max_key_count) {
- BT_ERR("load_link_keys: too big key_count value %u",
- key_count);
+ bt_dev_err(hdev, "load_link_keys: too big key_count value %u",
+ key_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_link_key_info);
if (expected_len != len) {
- BT_ERR("load_link_keys: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr));
- BT_ERR("PIN code is not 16 bytes long");
+ bt_dev_err(hdev, "PIN code is not 16 bytes long");
err = send_pin_code_neg_reply(sk, hdev, &ncp);
if (err >= 0)
@@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
MGMT_OP_ADD_REMOTE_OOB_DATA,
status, &cp->addr, sizeof(cp->addr));
} else {
- BT_ERR("add_remote_oob_data: invalid length of %u bytes", len);
+ bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes",
+ len);
err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
uuid_count = __le16_to_cpu(cp->uuid_count);
if (uuid_count > max_uuid_count) {
- BT_ERR("service_discovery: too big uuid_count value %u",
- uuid_count);
+ bt_dev_err(hdev, "service_discovery: too big uuid_count value %u",
+ uuid_count);
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_START_SERVICE_DISCOVERY,
MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
expected_len = sizeof(*cp) + uuid_count * 16;
if (expected_len != len) {
- BT_ERR("service_discovery: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes",
+ expected_len, len);
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_START_SERVICE_DISCOVERY,
MGMT_STATUS_INVALID_PARAMS, &cp->type,
@@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status,
err = hci_req_run(&req, enable_advertising_instance);
if (err)
- BT_ERR("Failed to re-configure advertising");
+ bt_dev_err(hdev, "failed to re-configure advertising");
unlock:
hci_dev_unlock(hdev);
@@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
irk_count = __le16_to_cpu(cp->irk_count);
if (irk_count > max_irk_count) {
- BT_ERR("load_irks: too big irk_count value %u", irk_count);
+ bt_dev_err(hdev, "load_irks: too big irk_count value %u",
+ irk_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
MGMT_STATUS_INVALID_PARAMS);
}
expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info);
if (expected_len != len) {
- BT_ERR("load_irks: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
key_count = __le16_to_cpu(cp->key_count);
if (key_count > max_key_count) {
- BT_ERR("load_ltks: too big key_count value %u", key_count);
+ bt_dev_err(hdev, "load_ltks: too big key_count value %u",
+ key_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
expected_len = sizeof(*cp) + key_count *
sizeof(struct mgmt_ltk_info);
if (expected_len != len) {
- BT_ERR("load_keys: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
}
if (!cp) {
- BT_ERR("invalid sent_cmd in conn_info response");
+ bt_dev_err(hdev, "invalid sent_cmd in conn_info response");
goto unlock;
}
handle = __le16_to_cpu(cp->handle);
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (!conn) {
- BT_ERR("unknown handle (%d) in conn_info response", handle);
+ bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
+ handle);
goto unlock;
}
@@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
param_count = __le16_to_cpu(cp->param_count);
if (param_count > max_param_count) {
- BT_ERR("load_conn_param: too big param_count value %u",
- param_count);
+ bt_dev_err(hdev, "load_conn_param: too big param_count value %u",
+ param_count);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
expected_len = sizeof(*cp) + param_count *
sizeof(struct mgmt_conn_param);
if (expected_len != len) {
- BT_ERR("load_conn_param: expected %u bytes, got %u bytes",
- expected_len, len);
+ bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes",
+ expected_len, len);
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM,
MGMT_STATUS_INVALID_PARAMS);
}
@@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
} else if (param->addr.type == BDADDR_LE_RANDOM) {
addr_type = ADDR_LE_DEV_RANDOM;
} else {
- BT_ERR("Ignoring invalid connection parameters");
+ bt_dev_err(hdev, "ignoring invalid connection parameters");
continue;
}
@@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
min, max, latency, timeout);
if (hci_check_conn_params(min, max, latency, timeout) < 0) {
- BT_ERR("Ignoring invalid connection parameters");
+ bt_dev_err(hdev, "ignoring invalid connection parameters");
continue;
}
hci_param = hci_conn_params_add(hdev, &param->addr.bdaddr,
addr_type);
if (!hci_param) {
- BT_ERR("Failed to add connection parameters");
+ bt_dev_err(hdev, "failed to add connection parameters");
continue;
}
@@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
if (skb_queue_empty(&req.cmd_q) ||
!hdev_is_powered(hdev) ||
hci_dev_test_flag(hdev, HCI_ADVERTISING)) {
+ hci_req_purge(&req);
rp.instance = cp->instance;
err = mgmt_cmd_complete(sk, hdev->id,
MGMT_OP_REMOVE_ADVERTISING,
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 34a1227..03e3c89 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
};
-static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
- const u8 pub_a[64], const u8 pub_b[64],
- const u8 dhkey[32])
+static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
+ const u8 priv_b[32], const u8 pub_a[64],
+ const u8 pub_b[64], const u8 dhkey[32])
{
u8 *tmp, *dhkey_a, *dhkey_b;
- int ret = 0;
+ int ret;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
@@ -152,17 +152,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
dhkey_a = &tmp[0];
dhkey_b = &tmp[32];
- compute_ecdh_secret(pub_b, priv_a, dhkey_a);
- compute_ecdh_secret(pub_a, priv_b, dhkey_b);
+ ret = set_ecdh_privkey(tfm, priv_a);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
+ if (ret)
+ goto out;
if (memcmp(dhkey_a, dhkey, 32)) {
ret = -EINVAL;
goto out;
}
+ ret = set_ecdh_privkey(tfm, priv_b);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
+ if (ret)
+ goto out;
+
if (memcmp(dhkey_b, dhkey, 32))
ret = -EINVAL;
-
+ /* fall through*/
out:
kfree(tmp);
return ret;
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
static int __init test_ecdh(void)
{
+ struct crypto_kpp *tfm;
ktime_t calltime, delta, rettime;
- unsigned long long duration;
+ unsigned long long duration = 0;
int err;
calltime = ktime_get();
- err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
+ tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ err = PTR_ERR(tfm);
+ goto done;
+ }
+
+ err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
+ dhkey_1);
if (err) {
BT_ERR("ECDH sample 1 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2);
+ err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
+ dhkey_2);
if (err) {
BT_ERR("ECDH sample 2 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3);
+ err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
+ dhkey_3);
if (err) {
BT_ERR("ECDH sample 3 failed");
goto done;
}
+ crypto_free_kpp(tfm);
+
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef897..01117ae 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
+#include <crypto/kpp.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
struct smp_dev {
/* Secure Connections OOB data */
u8 local_pk[64];
- u8 local_sk[32];
u8 local_rand[16];
bool debug_key;
@@ -92,6 +92,7 @@ struct smp_dev {
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
/* Secure Connections variables */
u8 local_pk[64];
- u8 local_sk[32];
u8 remote_pk[64];
u8 dhkey[32];
u8 mackey[16];
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
/* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
+ if (err)
+ return err;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
smp->debug_key = true;
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
- return -EIO;
+ /* Generate key pair for Secure Connections */
+ err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
+ if (err)
+ return err;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("OOB Private Key: %32phN", smp->local_sk);
get_random_bytes(smp->local_rand, 16);
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
/* Ensure that we don't leave any debug key around if debug key
* support hasn't been explicitly enabled.
@@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp)
return SMP_UNSPECIFIED;
if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
- BT_ERR("Pairing failed (confirmation values mismatch)");
+ bt_dev_err(hcon->hdev, "pairing failed "
+ "(confirmation values mismatch)");
return SMP_CONFIRM_FAILED;
}
@@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp)
key = hci_find_link_key(hdev, &hcon->dst);
if (!key) {
- BT_ERR("%s No Link Key found to generate LTK", hdev->name);
+ bt_dev_err(hdev, "no Link Key found to generate LTK");
return;
}
@@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(smp->tfm_aes)) {
BT_ERR("Unable to create AES crypto context");
- kzfree(smp);
- return NULL;
+ goto zfree_smp;
}
smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(smp->tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_cipher(smp->tfm_aes);
- kzfree(smp);
- return NULL;
+ goto free_cipher;
+ }
+
+ smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(smp->tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ goto free_shash;
}
smp->conn = conn;
@@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
hci_conn_hold(conn->hcon);
return smp;
+
+free_shash:
+ crypto_free_shash(smp->tfm_cmac);
+free_cipher:
+ crypto_free_cipher(smp->tfm_aes);
+zfree_smp:
+ kzfree(smp);
+ return NULL;
}
static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
smp_dev = chan->data;
memcpy(smp->local_pk, smp_dev->local_pk, 64);
- memcpy(smp->local_sk, smp_dev->local_sk, 32);
memcpy(smp->lr, smp_dev->local_rand, 16);
if (smp_dev->debug_key)
@@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
+ return SMP_UNSPECIFIED;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
+ /* Generate key pair for Secure Connections */
+ if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
}
@@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
done:
SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("Local Private Key: %32phN", smp->local_sk);
smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
@@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
return SMP_UNSPECIFIED;
if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) {
- BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode");
+ bt_dev_err(hdev, "refusing legacy fallback in SC-only mode");
return SMP_UNSPECIFIED;
}
- BT_ERR("Trying to fall back to legacy SMP");
+ bt_dev_err(hdev, "trying to fall back to legacy SMP");
req = (void *) &smp->preq[1];
rsp = (void *) &smp->prsp[1];
@@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
auth = req->auth_req & AUTH_REQ_MASK(hdev);
if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) {
- BT_ERR("Failed to fall back to legacy SMP");
+ bt_dev_err(hdev, "failed to fall back to legacy SMP");
return SMP_UNSPECIFIED;
}
@@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
chan = conn->smp;
if (!chan) {
- BT_ERR("SMP security requested but not available");
+ bt_dev_err(hcon->hdev, "security requested but not available");
return 1;
}
@@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn,
*/
if (!bacmp(&info->bdaddr, BDADDR_ANY) ||
!hci_is_identity_address(&info->bdaddr, info->addr_type)) {
- BT_ERR("Ignoring IRK with no identity address");
+ bt_dev_err(hcon->hdev, "ignoring IRK with no identity address");
goto distribute;
}
@@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
struct l2cap_chan *chan = conn->smp;
struct smp_chan *smp = chan->data;
struct hci_dev *hdev = hcon->hdev;
+ struct crypto_kpp *tfm_ecdh;
struct smp_cmd_pairing_confirm cfm;
int err;
@@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
- if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
+ /* Compute the shared secret on the same crypto tfm on which the private
+ * key was set/generated.
+ */
+ if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
+ struct smp_dev *smp_dev = chan->data;
+
+ tfm_ecdh = smp_dev->tfm_ecdh;
+ } else {
+ tfm_ecdh = smp->tfm_ecdh;
+ }
+
+ if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
return SMP_UNSPECIFIED;
SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -2933,8 +2954,8 @@ done:
return err;
drop:
- BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name,
- code, &hcon->dst);
+ bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR",
+ code, &hcon->dst);
kfree_skb(skb);
return 0;
}
@@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
smp = smp_chan_create(conn);
if (!smp) {
- BT_ERR("%s unable to create SMP context for BR/EDR",
- hdev->name);
+ bt_dev_err(hdev, "unable to create SMP context for BR/EDR");
return;
}
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
struct smp_dev *smp;
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
if (cid == L2CAP_CID_SMP_BREDR) {
smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
return ERR_CAST(tfm_cmac);
}
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ kzfree(smp);
+ return ERR_CAST(tfm_ecdh);
+ }
+
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
+ smp->tfm_ecdh = tfm_ecdh;
smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
@@ -3205,6 +3236,7 @@ create_chan:
if (smp) {
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
chan->data = NULL;
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
-static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
{
- int i;
-
- for (i = 0; i < ndigits; i++)
- out[i] = __swab64(in[ndigits - 1 - i]);
-}
-
-static int __init test_debug_key(void)
-{
- u8 pk[64], sk[32];
-
- swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
+ u8 pk[64];
+ int err;
- if (!generate_ecdh_keys(pk, sk))
- return -EINVAL;
+ err = set_ecdh_privkey(tfm_ecdh, debug_sk);
+ if (err)
+ return err;
- if (crypto_memneq(sk, debug_sk, 32))
- return -EINVAL;
+ err = generate_ecdh_public_key(tfm_ecdh, pk);
+ if (err)
+ return err;
if (crypto_memneq(pk, debug_pk, 64))
return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
};
static int __init run_selftests(struct crypto_cipher *tfm_aes,
- struct crypto_shash *tfm_cmac)
+ struct crypto_shash *tfm_cmac,
+ struct crypto_kpp *tfm_ecdh)
{
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
calltime = ktime_get();
- err = test_debug_key();
+ err = test_debug_key(tfm_ecdh);
if (err) {
BT_ERR("debug_key test failed");
goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
{
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
int err;
tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
return PTR_ERR(tfm_cmac);
}
- err = run_selftests(tfm_aes, tfm_cmac);
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ return PTR_ERR(tfm_ecdh);
+ }
+
+ err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
crypto_free_shash(tfm_cmac);
crypto_free_cipher(tfm_aes);
+ crypto_free_kpp(tfm_ecdh);
return err;
}
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 40b1ede..ac9ef33 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the IEEE 802.1d ethernet bridging layer.
#
@@ -7,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o \
- br_netlink_tunnel.o
+ br_netlink_tunnel.o br_arp_nd_proxy.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1407d1b..6bf06e7 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
/* Events that may cause spanning tree to refresh */
if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
event == NETDEV_CHANGE || event == NETDEV_DOWN)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
return NOTIFY_DONE;
}
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 0000000..2cf7716
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
+/*
+ * Handle bridge arp/nd proxy/suppress
+ *
+ * Copyright (C) 2017 Cumulus Networks
+ * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/neighbour.h>
+#include <net/arp.h>
+#include <linux/if_vlan.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool neigh_suppress = false;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->flags & BR_NEIGH_SUPPRESS) {
+ neigh_suppress = true;
+ break;
+ }
+ }
+
+ br->neigh_suppress_enabled = neigh_suppress;
+}
+
+#if IS_ENABLED(CONFIG_INET)
+static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct net_device *dev, __be32 dest_ip, __be32 src_ip,
+ const unsigned char *dest_hw,
+ const unsigned char *src_hw,
+ const unsigned char *target_hw,
+ __be16 vlan_proto, u16 vlan_tci)
+{
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *skb;
+ u16 pvid;
+
+ netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
+ dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
+
+ if (!vlan_tci) {
+ arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ return;
+ }
+
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ if (!skb)
+ return;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+
+ if (p) {
+ arp_xmit(skb);
+ } else {
+ skb_reset_mac_header(skb);
+ __skb_pull(skb, skb_network_offset(skb));
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(skb);
+ }
+}
+
+static int br_chk_addr_ip(struct net_device *dev, void *data)
+{
+ __be32 ip = *(__be32 *)data;
+ struct in_device *in_dev;
+ __be32 addr = 0;
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev)
+ addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
+ RT_SCOPE_HOST);
+
+ if (addr == ip)
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip(struct net_device *dev, __be32 ip)
+{
+ if (br_chk_addr_ip(dev, &ip))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+ return true;
+
+ return false;
+}
+
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = dev;
+ struct neighbour *n;
+ struct arphdr *parp;
+ u8 *arpptr, *sha;
+ __be32 sip, tip;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if ((dev->flags & IFF_NOARP) ||
+ !pskb_may_pull(skb, arp_hdr_len(dev)))
+ return;
+
+ parp = arp_hdr(skb);
+
+ if (parp->ar_pro != htons(ETH_P_IP) ||
+ parp->ar_hln != dev->addr_len ||
+ parp->ar_pln != 4)
+ return;
+
+ arpptr = (u8 *)parp + sizeof(struct arphdr);
+ sha = arpptr;
+ arpptr += dev->addr_len; /* sha */
+ memcpy(&sip, arpptr, sizeof(sip));
+ arpptr += sizeof(sip);
+ arpptr += dev->addr_len; /* tha */
+ memcpy(&tip, arpptr, sizeof(tip));
+
+ if (ipv4_is_loopback(tip) ||
+ ipv4_is_multicast(tip))
+ return;
+
+ if (br->neigh_suppress_enabled) {
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+ if (ipv4_is_zeronet(sip) || sip == tip) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+ }
+
+ if (parp->ar_op != htons(ARPOP_REQUEST))
+ return;
+
+ if (vid != 0) {
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ }
+
+ if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+ /* its our local ip, so don't proxy reply
+ * and don't forward to neigh suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(&arp_tbl, &tip, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if ((p && (p->flags & BR_PROXYARP)) ||
+ (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
+ BR_NEIGH_SUPPRESS)))) {
+ if (!vid)
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha, 0, 0);
+ else
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb));
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to arp replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+
+ neigh_release(n);
+ }
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
+{
+ struct nd_msg *m;
+
+ m = skb_header_pointer(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr), sizeof(*msg), msg);
+ if (!m)
+ return NULL;
+
+ if (m->icmph.icmp6_code != 0 ||
+ (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
+ m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return NULL;
+
+ return m;
+}
+
+static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct sk_buff *request, struct neighbour *n,
+ __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
+{
+ struct net_device *dev = request->dev;
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *reply;
+ struct nd_msg *na;
+ struct ipv6hdr *pip6;
+ int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+ int ns_olen;
+ int i, len;
+ u8 *daddr;
+ u16 pvid;
+
+ if (!dev)
+ return;
+
+ len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+ sizeof(*na) + na_olen + dev->needed_tailroom;
+
+ reply = alloc_skb(len, GFP_ATOMIC);
+ if (!reply)
+ return;
+
+ reply->protocol = htons(ETH_P_IPV6);
+ reply->dev = dev;
+ skb_reserve(reply, LL_RESERVED_SPACE(dev));
+ skb_push(reply, sizeof(struct ethhdr));
+ skb_set_mac_header(reply, 0);
+
+ daddr = eth_hdr(request)->h_source;
+
+ /* Do we need option processing ? */
+ ns_olen = request->len - (skb_network_offset(request) +
+ sizeof(struct ipv6hdr)) - sizeof(*ns);
+ for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
+ if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ break;
+ }
+ }
+
+ /* Ethernet header */
+ ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
+ ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
+ eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+ reply->protocol = htons(ETH_P_IPV6);
+
+ skb_pull(reply, sizeof(struct ethhdr));
+ skb_set_network_header(reply, 0);
+ skb_put(reply, sizeof(struct ipv6hdr));
+
+ /* IPv6 header */
+ pip6 = ipv6_hdr(reply);
+ memset(pip6, 0, sizeof(struct ipv6hdr));
+ pip6->version = 6;
+ pip6->priority = ipv6_hdr(request)->priority;
+ pip6->nexthdr = IPPROTO_ICMPV6;
+ pip6->hop_limit = 255;
+ pip6->daddr = ipv6_hdr(request)->saddr;
+ pip6->saddr = *(struct in6_addr *)n->primary_key;
+
+ skb_pull(reply, sizeof(struct ipv6hdr));
+ skb_set_transport_header(reply, 0);
+
+ na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
+
+ /* Neighbor Advertisement */
+ memset(na, 0, sizeof(*na) + na_olen);
+ na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+ na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+ na->icmph.icmp6_override = 1;
+ na->icmph.icmp6_solicited = 1;
+ na->target = ns->target;
+ ether_addr_copy(&na->opt[2], n->ha);
+ na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+ na->opt[1] = na_olen >> 3;
+
+ na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+ &pip6->daddr,
+ sizeof(*na) + na_olen,
+ IPPROTO_ICMPV6,
+ csum_partial(na, sizeof(*na) + na_olen, 0));
+
+ pip6->payload_len = htons(sizeof(*na) + na_olen);
+
+ skb_push(reply, sizeof(struct ipv6hdr));
+ skb_push(reply, sizeof(struct ethhdr));
+
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
+
+ netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
+ dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
+
+ if (p) {
+ dev_queue_xmit(reply);
+ } else {
+ skb_reset_mac_header(reply);
+ __skb_pull(reply, skb_network_offset(reply));
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+ reply->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(reply);
+ }
+}
+
+static int br_chk_addr_ip6(struct net_device *dev, void *data)
+{
+ struct in6_addr *addr = (struct in6_addr *)data;
+
+ if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
+
+{
+ if (br_chk_addr_ip6(dev, addr))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+ return true;
+
+ return false;
+}
+
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = NULL;
+ struct in6_addr *saddr, *daddr;
+ struct ipv6hdr *iphdr;
+ struct neighbour *n;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+
+ if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
+ !msg->icmph.icmp6_solicited) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+ return;
+
+ iphdr = ipv6_hdr(skb);
+ saddr = &iphdr->saddr;
+ daddr = &iphdr->daddr;
+
+ if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (vid != 0) {
+ /* build neigh table lookup on the vlan device */
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ } else {
+ vlandev = dev;
+ }
+
+ if (br_is_local_ip6(vlandev, &msg->target)) {
+ /* its our own ip, so don't proxy reply
+ * and don't forward to arp suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
+ if (vid != 0)
+ br_nd_send(br, p, skb, n,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb), msg);
+ else
+ br_nd_send(br, p, skb, n, 0, 0, msg);
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to NEIGH_SUPPRESS ports that we
+ * have replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+ neigh_release(n);
+ }
+}
+#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f6b6a92..af5b8c8 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
const unsigned char *dest;
+ struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -57,11 +58,30 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
+ if (IS_ENABLED(CONFIG_INET) &&
+ (eth->h_proto == htons(ETH_P_ARP) ||
+ eth->h_proto == htons(ETH_P_RARP)) &&
+ br->neigh_suppress_enabled) {
+ br_do_proxy_suppress_arp(skb, br, vid, NULL);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, NULL, msg);
+ }
+
dest = eth_hdr(skb)->h_dest;
if (is_broadcast_ether_addr(dest)) {
br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -320,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
#endif
-static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = netdev_priv(dev);
- return br_add_if(br, slave_dev);
+ return br_add_if(br, slave_dev, extack);
}
static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
@@ -400,7 +421,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_id.prio[0] = 0x80;
br->bridge_id.prio[1] = 0x00;
- ether_addr_copy(br->group_addr, eth_reserved_addr_base);
+ ether_addr_copy(br->group_addr, eth_stp_addr);
br->stp_enabled = BR_NO_STP;
br->group_fwd_mask = BR_GROUPFWD_DEFAULT;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb174..b4eed11 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
- if ((p->flags & BR_PROXYARP_WIFI) &&
+ if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22..9ba4ed6 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p)
br_stp_disable_port(p);
spin_unlock_bh(&br->lock);
- br_ifinfo_notify(RTM_DELLINK, p);
+ br_ifinfo_notify(RTM_DELLINK, NULL, p);
list_del_rcu(&p->list);
if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
del_nbp(p);
}
+ br_recalculate_neigh_suppress_enabled(br);
+
br_fdb_delete_by_port(br, NULL, 0, 1);
cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
}
/* called with RTNL */
-int br_add_if(struct net_bridge *br, struct net_device *dev)
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port *p;
int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return -EINVAL;
/* No bridging of bridges */
- if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
+ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave a bridge to a bridge");
return -ELOOP;
+ }
/* Device is already being bridged */
if (br_port_exists(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
- if (dev->priv_flags & IFF_DONT_BRIDGE)
+ if (dev->priv_flags & IFF_DONT_BRIDGE) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not allow enslaving to a bridge");
return -EOPNOTSUPP;
+ }
p = new_nbp(br, dev);
if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
if (err)
goto err5;
@@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_AUTO_MASK)
nbp_update_port_count(br);
+
+ if (mask & BR_NEIGH_SUPPRESS)
+ br_recalculate_neigh_suppress_enabled(br);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7cb6137..a096d3e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_netif_receive_skb);
}
-static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
- u16 vid, struct net_bridge_port *p)
-{
- struct net_device *dev = br->dev;
- struct neighbour *n;
- struct arphdr *parp;
- u8 *arpptr, *sha;
- __be32 sip, tip;
-
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
-
- if ((dev->flags & IFF_NOARP) ||
- !pskb_may_pull(skb, arp_hdr_len(dev)))
- return;
-
- parp = arp_hdr(skb);
-
- if (parp->ar_pro != htons(ETH_P_IP) ||
- parp->ar_op != htons(ARPOP_REQUEST) ||
- parp->ar_hln != dev->addr_len ||
- parp->ar_pln != 4)
- return;
-
- arpptr = (u8 *)parp + sizeof(struct arphdr);
- sha = arpptr;
- arpptr += dev->addr_len; /* sha */
- memcpy(&sip, arpptr, sizeof(sip));
- arpptr += sizeof(sip);
- arpptr += dev->addr_len; /* tha */
- memcpy(&tip, arpptr, sizeof(tip));
-
- if (ipv4_is_loopback(tip) ||
- ipv4_is_multicast(tip))
- return;
-
- n = neigh_lookup(&arp_tbl, &tip, dev);
- if (n) {
- struct net_bridge_fdb_entry *f;
-
- if (!(n->nud_state & NUD_VALID)) {
- neigh_release(n);
- return;
- }
-
- f = br_fdb_find_rcu(br, n->ha, vid);
- if (f && ((p->flags & BR_PROXYARP) ||
- (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
- sha, n->ha, sha);
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
- }
-
- neigh_release(n);
- }
-}
-
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -171,8 +115,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
- if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
- br_do_proxy_arp(skb, br, vid, p);
+ if (IS_ENABLED(CONFIG_INET) &&
+ (skb->protocol == htons(ETH_P_ARP) ||
+ skb->protocol == htons(ETH_P_RARP))) {
+ br_do_proxy_suppress_arp(skb, br, vid, p);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, p, msg);
+ }
switch (pkt_type) {
case BR_PKT_MULTICAST:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 66cd987..73b957f 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,13 +98,10 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
return -EINVAL;
if (isadd)
- ret = br_add_if(br, dev);
+ ret = br_add_if(br, dev, NULL);
else
ret = br_del_if(br, dev);
- if (!ret)
- rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL);
-
return ret;
}
@@ -296,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!ret) {
if (p)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
else
netdev_state_change(br->dev);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ca01def..31ddff2 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/err.h>
#include <linux/igmp.h>
#include <linux/kernel.h>
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d..5f7f0e9 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head)
kfree(mp);
}
-static void br_multicast_group_expired(unsigned long data)
+static void br_multicast_group_expired(struct timer_list *t)
{
- struct net_bridge_mdb_entry *mp = (void *)data;
+ struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
struct net_bridge_mdb_htable *mdb;
@@ -302,9 +302,9 @@ static void br_multicast_del_pg(struct net_bridge *br,
WARN_ON(1);
}
-static void br_multicast_port_group_expired(unsigned long data)
+static void br_multicast_port_group_expired(struct timer_list *t)
{
- struct net_bridge_port_group *pg = (void *)data;
+ struct net_bridge_port_group *pg = from_timer(pg, t, timer);
struct net_bridge *br = pg->port->br;
spin_lock(&br->multicast_lock);
@@ -701,8 +701,7 @@ rehash:
mp->br = br;
mp->addr = *group;
- setup_timer(&mp->timer, br_multicast_group_expired,
- (unsigned long)mp);
+ timer_setup(&mp->timer, br_multicast_group_expired, 0);
hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
mdb->size++;
@@ -729,8 +728,7 @@ struct net_bridge_port_group *br_multicast_new_port_group(
p->flags = flags;
rcu_assign_pointer(p->next, next);
hlist_add_head(&p->mglist, &port->mglist);
- setup_timer(&p->timer, br_multicast_port_group_expired,
- (unsigned long)p);
+ timer_setup(&p->timer, br_multicast_port_group_expired, 0);
if (src)
memcpy(p->eth_addr, src, ETH_ALEN);
@@ -843,9 +841,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
}
#endif
-static void br_multicast_router_expired(unsigned long data)
+static void br_multicast_router_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port =
+ from_timer(port, t, multicast_router_timer);
struct net_bridge *br = port->br;
spin_lock(&br->multicast_lock);
@@ -859,8 +858,32 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_multicast_local_router_expired(unsigned long data)
+static void br_mc_router_state_change(struct net_bridge *p,
+ bool is_mc_router)
{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+ .flags = SWITCHDEV_F_DEFER,
+ .u.mrouter = is_mc_router,
+ };
+
+ switchdev_port_attr_set(p->dev, &attr);
+}
+
+static void br_multicast_local_router_expired(struct timer_list *t)
+{
+ struct net_bridge *br = from_timer(br, t, multicast_router_timer);
+
+ spin_lock(&br->multicast_lock);
+ if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ br->multicast_router == MDB_RTR_TYPE_PERM ||
+ timer_pending(&br->multicast_router_timer))
+ goto out;
+
+ br_mc_router_state_change(br, false);
+out:
+ spin_unlock(&br->multicast_lock);
}
static void br_multicast_querier_expired(struct net_bridge *br,
@@ -876,17 +899,17 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_querier_expired(unsigned long data)
+static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
br_multicast_querier_expired(br, &br->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_querier_expired(unsigned long data)
+static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
br_multicast_querier_expired(br, &br->ip6_own_query);
}
@@ -987,17 +1010,17 @@ out:
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_port_query_expired(unsigned long data)
+static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
br_multicast_port_query_expired(port, &port->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_port_query_expired(unsigned long data)
+static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = (void *)data;
+ struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
br_multicast_port_query_expired(port, &port->ip6_own_query);
}
@@ -1019,13 +1042,13 @@ int br_multicast_add_port(struct net_bridge_port *port)
{
port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- setup_timer(&port->multicast_router_timer, br_multicast_router_expired,
- (unsigned long)port);
- setup_timer(&port->ip4_own_query.timer,
- br_ip4_multicast_port_query_expired, (unsigned long)port);
+ timer_setup(&port->multicast_router_timer,
+ br_multicast_router_expired, 0);
+ timer_setup(&port->ip4_own_query.timer,
+ br_ip4_multicast_port_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&port->ip6_own_query.timer,
- br_ip6_multicast_port_query_expired, (unsigned long)port);
+ timer_setup(&port->ip6_own_query.timer,
+ br_ip6_multicast_port_query_expired, 0);
#endif
br_mc_disabled_update(port->dev, port->br->multicast_disabled);
@@ -1364,9 +1387,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
unsigned long now = jiffies;
if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+ if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!timer_pending(&br->multicast_router_timer))
+ br_mc_router_state_change(br, true);
mod_timer(&br->multicast_router_timer,
now + br->multicast_querier_interval);
+ }
return;
}
@@ -1906,17 +1932,17 @@ static void br_multicast_query_expired(struct net_bridge *br,
spin_unlock(&br->multicast_lock);
}
-static void br_ip4_multicast_query_expired(unsigned long data)
+static void br_ip4_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_query_expired(unsigned long data)
+static void br_ip6_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = (void *)data;
+ struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
}
@@ -1951,17 +1977,17 @@ void br_multicast_init(struct net_bridge *br)
br->has_ipv6_addr = 1;
spin_lock_init(&br->multicast_lock);
- setup_timer(&br->multicast_router_timer,
+ timer_setup(&br->multicast_router_timer,
br_multicast_local_router_expired, 0);
- setup_timer(&br->ip4_other_query.timer,
- br_ip4_multicast_querier_expired, (unsigned long)br);
- setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
- (unsigned long)br);
+ timer_setup(&br->ip4_other_query.timer,
+ br_ip4_multicast_querier_expired, 0);
+ timer_setup(&br->ip4_own_query.timer,
+ br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- setup_timer(&br->ip6_other_query.timer,
- br_ip6_multicast_querier_expired, (unsigned long)br);
- setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired,
- (unsigned long)br);
+ timer_setup(&br->ip6_other_query.timer,
+ br_ip6_multicast_querier_expired, 0);
+ timer_setup(&br->ip6_own_query.timer,
+ br_ip6_multicast_query_expired, 0);
#endif
}
@@ -2042,9 +2068,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
+ br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
del_timer(&br->multicast_router_timer);
- /* fall through */
+ br->multicast_router = val;
+ err = 0;
+ break;
case MDB_RTR_TYPE_TEMP_QUERY:
+ if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ br_mc_router_state_change(br, false);
br->multicast_router = val;
err = 0;
break;
@@ -2184,6 +2215,18 @@ bool br_multicast_enabled(const struct net_device *dev)
}
EXPORT_SYMBOL_GPL(br_multicast_enabled);
+bool br_multicast_router(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ bool is_router;
+
+ spin_lock_bh(&br->multicast_lock);
+ is_router = br_multicast_is_router(br);
+ spin_unlock_bh(&br->multicast_lock);
+ return is_router;
+}
+EXPORT_SYMBOL_GPL(br_multicast_router);
+
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index dea88a2..67bae0f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -210,7 +211,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
BR_VLAN_TUNNEL)) ||
- nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask))
+ nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
+ nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ !!(p->flags & BR_NEIGH_SUPPRESS)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -358,14 +361,14 @@ nla_put_failure:
* Contains port and master info as well as carrier and bridge state.
*/
static int br_fill_ifinfo(struct sk_buff *skb,
- struct net_bridge_port *port,
+ const struct net_bridge_port *port,
u32 pid, u32 seq, int event, unsigned int flags,
u32 filter_mask, const struct net_device *dev)
{
+ u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_bridge *br;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
- u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
if (port)
br = port->br;
@@ -451,28 +454,36 @@ nla_put_failure:
return -EMSGSIZE;
}
-/*
- * Notify listeners of a change in port information
- */
-void br_ifinfo_notify(int event, struct net_bridge_port *port)
+/* Notify listeners of a change in bridge or port information */
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+ const struct net_bridge_port *port)
{
- struct net *net;
+ u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+ struct net_device *dev;
struct sk_buff *skb;
int err = -ENOBUFS;
- u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED;
+ struct net *net;
+ u16 port_no = 0;
- if (!port)
+ if (WARN_ON(!port && !br))
return;
- net = dev_net(port->dev);
- br_debug(port->br, "port %u(%s) event %d\n",
- (unsigned int)port->port_no, port->dev->name, event);
+ if (port) {
+ dev = port->dev;
+ br = port->br;
+ port_no = port->port_no;
+ } else {
+ dev = br->dev;
+ }
- skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC);
+ net = dev_net(dev);
+ br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event);
+
+ skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC);
if (skb == NULL)
goto errout;
- err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev);
+ err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev);
if (err < 0) {
/* -EMSGSIZE implies BUG in br_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -485,7 +496,6 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
}
-
/*
* Dump information about all ports, in response to GETLINK
*/
@@ -503,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
}
static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
- int cmd, struct bridge_vlan_info *vinfo)
+ int cmd, struct bridge_vlan_info *vinfo, bool *changed)
{
+ bool curr_change;
int err = 0;
switch (cmd) {
@@ -513,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
/* if the MASTER flag is set this will act on the global
* per-VLAN entry as well
*/
- err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
- if (err)
- break;
+ err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
+ &curr_change);
} else {
vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
- err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+ err = br_vlan_add(br, vinfo->vid, vinfo->flags,
+ &curr_change);
}
+ if (curr_change)
+ *changed = true;
break;
case RTM_DELLINK:
if (p) {
- nbp_vlan_delete(p, vinfo->vid);
- if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
- br_vlan_delete(p->br, vinfo->vid);
- } else {
- br_vlan_delete(br, vinfo->vid);
+ if (!nbp_vlan_delete(p, vinfo->vid))
+ *changed = true;
+
+ if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) &&
+ !br_vlan_delete(p->br, vinfo->vid))
+ *changed = true;
+ } else if (!br_vlan_delete(br, vinfo->vid)) {
+ *changed = true;
}
break;
}
@@ -539,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
static int br_process_vlan_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct bridge_vlan_info *vinfo_curr,
- struct bridge_vlan_info **vinfo_last)
+ struct bridge_vlan_info **vinfo_last,
+ bool *changed)
{
if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
return -EINVAL;
@@ -569,22 +586,22 @@ static int br_process_vlan_info(struct net_bridge *br,
sizeof(struct bridge_vlan_info));
for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
tmp_vinfo.vid = v;
- err = br_vlan_info(br, p, cmd, &tmp_vinfo);
+ err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
if (err)
break;
}
*vinfo_last = NULL;
- return 0;
+ return err;
}
- return br_vlan_info(br, p, cmd, vinfo_curr);
+ return br_vlan_info(br, p, cmd, vinfo_curr, changed);
}
static int br_afspec(struct net_bridge *br,
struct net_bridge_port *p,
struct nlattr *af_spec,
- int cmd)
+ int cmd, bool *changed)
{
struct bridge_vlan_info *vinfo_curr = NULL;
struct bridge_vlan_info *vinfo_last = NULL;
@@ -604,7 +621,8 @@ static int br_afspec(struct net_bridge *br,
return err;
err = br_process_vlan_tunnel_info(br, p, cmd,
&tinfo_curr,
- &tinfo_last);
+ &tinfo_last,
+ changed);
if (err)
return err;
break;
@@ -613,7 +631,7 @@ static int br_afspec(struct net_bridge *br,
return -EINVAL;
vinfo_curr = nla_data(attr);
err = br_process_vlan_info(br, p, cmd, vinfo_curr,
- &vinfo_last);
+ &vinfo_last, changed);
if (err)
return err;
break;
@@ -640,6 +658,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
+ [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -785,6 +804,11 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
p->group_fwd_mask = fwd_mask;
}
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ BR_NEIGH_SUPPRESS);
+ if (err)
+ return err;
+
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
@@ -792,10 +816,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
/* Change state and parameters on port. */
int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
+ struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
+ struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+ struct net_bridge_port *p;
struct nlattr *protinfo;
struct nlattr *afspec;
- struct net_bridge_port *p;
- struct nlattr *tb[IFLA_BRPORT_MAX + 1];
+ bool changed = false;
int err = 0;
protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
@@ -831,15 +857,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
}
if (err)
goto out;
+ changed = true;
}
- if (afspec) {
- err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
- afspec, RTM_SETLINK);
- }
+ if (afspec)
+ err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
- if (err == 0)
- br_ifinfo_notify(RTM_NEWLINK, p);
+ if (changed)
+ br_ifinfo_notify(RTM_NEWLINK, br, p);
out:
return err;
}
@@ -847,8 +872,10 @@ out:
/* Delete port information */
int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
- struct nlattr *afspec;
+ struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
struct net_bridge_port *p;
+ struct nlattr *afspec;
+ bool changed = false;
int err = 0;
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -860,13 +887,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (!p && !(dev->priv_flags & IFF_EBRIDGE))
return -EINVAL;
- err = br_afspec((struct net_bridge *)netdev_priv(dev), p,
- afspec, RTM_DELLINK);
- if (err == 0)
+ err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+ if (changed)
/* Send RTM_NEWLINK because userspace
* expects RTM_NEWLINK for vlan dels
*/
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, br, p);
return err;
}
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 3712c7f..da8cb99 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX +
};
static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
- u16 vid, u32 tun_id)
+ u16 vid, u32 tun_id, bool *changed)
{
int err = 0;
@@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd,
switch (cmd) {
case RTM_SETLINK:
err = nbp_vlan_tunnel_info_add(p, vid, tun_id);
+ if (!err)
+ *changed = true;
break;
case RTM_DELLINK:
- nbp_vlan_tunnel_info_delete(p, vid);
+ if (!nbp_vlan_tunnel_info_delete(p, vid))
+ *changed = true;
break;
}
@@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr,
int br_process_vlan_tunnel_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct vtunnel_info *tinfo_curr,
- struct vtunnel_info *tinfo_last)
+ struct vtunnel_info *tinfo_last,
+ bool *changed)
{
int err;
@@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
return -EINVAL;
t = tinfo_last->tunid;
for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) {
- err = br_vlan_tunnel_info(p, cmd, v, t);
+ err = br_vlan_tunnel_info(p, cmd, v, t, changed);
if (err)
return err;
t++;
@@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
if (tinfo_last->flags)
return -EINVAL;
err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid,
- tinfo_curr->tunid);
+ tinfo_curr->tunid, changed);
if (err)
return err;
memset(tinfo_last, 0, sizeof(struct vtunnel_info));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 020c709..40553d8 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -404,6 +404,7 @@ struct net_bridge {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ bool neigh_suppress_enabled;
};
struct br_input_skb_cb {
@@ -566,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
void br_port_carrier_check(struct net_bridge_port *p);
int br_add_bridge(struct net *net, const char *name);
int br_del_bridge(struct net *net, const char *name);
-int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
int br_min_mtu(const struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -801,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
const struct net_bridge_port *port,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb);
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+ bool *changed);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -814,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
int nbp_vlan_init(struct net_bridge_port *port);
@@ -901,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
return skb;
}
-static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
+ bool *changed)
{
+ *changed = false;
return -EOPNOTSUPP;
}
@@ -924,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br)
return 0;
}
-static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed)
{
+ *changed = false;
return -EOPNOTSUPP;
}
@@ -1063,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr)
extern struct rtnl_link_ops br_link_ops;
int br_netlink_init(void);
void br_netlink_fini(void);
-void br_ifinfo_notify(int event, struct net_bridge_port *port);
+void br_ifinfo_notify(int event, const struct net_bridge *br,
+ const struct net_bridge_port *port);
int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
@@ -1138,4 +1147,11 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
}
#endif /* CONFIG_NET_SWITCHDEV */
+/* br_arp_nd_proxy.c */
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p);
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
#endif
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index 4a447a3..a259471 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br,
struct net_bridge_port *p,
int cmd,
struct vtunnel_info *tinfo_curr,
- struct vtunnel_info *tinfo_last);
+ struct vtunnel_info *tinfo_last,
+ bool *changed);
int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg);
int br_fill_vlan_tunnel_info(struct sk_buff *skb,
struct net_bridge_vlan_group *vg);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 8f56c2d..b694196 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br,
(unsigned int) p->port_no, p->dev->name);
br_set_state(p, BR_STATE_LISTENING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (br->forward_delay > 0)
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
@@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p)
br_topology_change_detection(p->br);
br_set_state(p, BR_STATE_BLOCKING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
del_timer(&p->forward_delay_timer);
}
@@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p)
else
br_set_state(p, BR_STATE_LEARNING);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
if (br->forward_delay != 0)
mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 8911031..808e2b9 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p)
{
br_init_port(p);
br_port_state_selection(p->br);
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
}
/* called under bridge lock */
@@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p)
p->topology_change_ack = 0;
p->config_pending = 0;
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
del_timer(&p->message_age_timer);
del_timer(&p->forward_delay_timer);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 60b6fe2..e7739de 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br)
return 0;
}
-static void br_hello_timer_expired(unsigned long arg)
+static void br_hello_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *)arg;
+ struct net_bridge *br = from_timer(br, t, hello_timer);
br_debug(br, "hello timer expired\n");
spin_lock(&br->lock);
@@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_message_age_timer_expired(unsigned long arg)
+static void br_message_age_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, message_age_timer);
struct net_bridge *br = p->br;
const bridge_id *id = &p->designated_bridge;
int was_root;
@@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_forward_delay_timer_expired(unsigned long arg)
+static void br_forward_delay_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, forward_delay_timer);
struct net_bridge *br = p->br;
br_debug(br, "port %u(%s) forward delay timer\n",
@@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg)
netif_carrier_on(br->dev);
}
rcu_read_lock();
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
rcu_read_unlock();
spin_unlock(&br->lock);
}
-static void br_tcn_timer_expired(unsigned long arg)
+static void br_tcn_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *) arg;
+ struct net_bridge *br = from_timer(br, t, tcn_timer);
br_debug(br, "tcn timer expired\n");
spin_lock(&br->lock);
@@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_topology_change_timer_expired(unsigned long arg)
+static void br_topology_change_timer_expired(struct timer_list *t)
{
- struct net_bridge *br = (struct net_bridge *) arg;
+ struct net_bridge *br = from_timer(br, t, topology_change_timer);
br_debug(br, "topo change timer expired\n");
spin_lock(&br->lock);
@@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg)
spin_unlock(&br->lock);
}
-static void br_hold_timer_expired(unsigned long arg)
+static void br_hold_timer_expired(struct timer_list *t)
{
- struct net_bridge_port *p = (struct net_bridge_port *) arg;
+ struct net_bridge_port *p = from_timer(p, t, hold_timer);
br_debug(p->br, "port %u(%s) hold timer expired\n",
(unsigned int) p->port_no, p->dev->name);
@@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg)
void br_stp_timer_init(struct net_bridge *br)
{
- setup_timer(&br->hello_timer, br_hello_timer_expired,
- (unsigned long) br);
-
- setup_timer(&br->tcn_timer, br_tcn_timer_expired,
- (unsigned long) br);
-
- setup_timer(&br->topology_change_timer,
- br_topology_change_timer_expired,
- (unsigned long) br);
+ timer_setup(&br->hello_timer, br_hello_timer_expired, 0);
+ timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0);
+ timer_setup(&br->topology_change_timer,
+ br_topology_change_timer_expired, 0);
}
void br_stp_port_timer_init(struct net_bridge_port *p)
{
- setup_timer(&p->message_age_timer, br_message_age_timer_expired,
- (unsigned long) p);
-
- setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired,
- (unsigned long) p);
-
- setup_timer(&p->hold_timer, br_hold_timer_expired,
- (unsigned long) p);
+ timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0);
+ timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0);
+ timer_setup(&p->hold_timer, br_hold_timer_expired, 0);
}
/* Report ticks left (in USER_HZ) used for API */
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index f6b1c7d..9700e0f 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/netdevice.h>
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 9110d5e..0254c35 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -191,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
+BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -241,6 +242,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_multicast_flood,
&brport_attr_broadcast_flood,
&brport_attr_group_fwd_mask,
+ &brport_attr_neigh_suppress,
NULL
};
@@ -278,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj,
ret = brport_attr->store(p, val);
spin_unlock_bh(&p->br->lock);
if (!ret) {
- br_ifinfo_notify(RTM_NEWLINK, p);
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
ret = count;
}
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 233a300..5193527 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
}
-static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid == vid)
- return;
+ return false;
smp_wmb();
vg->pvid = vid;
+
+ return true;
}
-static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
if (vg->pvid != vid)
- return;
+ return false;
smp_wmb();
vg->pvid = 0;
+
+ return true;
}
-static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
+/* return true if anything changed, false otherwise */
+static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
{
struct net_bridge_vlan_group *vg;
+ u16 old_flags = v->flags;
+ bool ret;
if (br_vlan_is_master(v))
vg = br_vlan_group(v->br);
@@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
vg = nbp_vlan_group(v->port);
if (flags & BRIDGE_VLAN_INFO_PVID)
- __vlan_add_pvid(vg, v->vid);
+ ret = __vlan_add_pvid(vg, v->vid);
else
- __vlan_delete_pvid(vg, v->vid);
+ ret = __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
+
+ return ret || !!(old_flags ^ v->flags);
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
@@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
vg = br_vlan_group(br);
masterv = br_vlan_find(vg, vid);
if (!masterv) {
+ bool changed;
+
/* missing global ctx, create it now */
- if (br_vlan_add(br, vid, 0))
+ if (br_vlan_add(br, vid, 0, &changed))
return NULL;
masterv = br_vlan_find(vg, vid);
if (WARN_ON(!masterv))
@@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
/* need to work on the master vlan too */
if (flags & BRIDGE_VLAN_INFO_MASTER) {
- err = br_vlan_add(br, v->vid, flags |
- BRIDGE_VLAN_INFO_BRENTRY);
+ bool changed;
+
+ err = br_vlan_add(br, v->vid,
+ flags | BRIDGE_VLAN_INFO_BRENTRY,
+ &changed);
if (err)
goto out_filt;
}
@@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
*/
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *vlan;
@@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
ASSERT_RTNL();
+ *changed = false;
vg = br_vlan_group(br);
vlan = br_vlan_find(vg, vid);
if (vlan) {
@@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
refcount_inc(&vlan->refcnt);
vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
vg->num_vlans++;
+ *changed = true;
}
- __vlan_add_flags(vlan, flags);
+ if (__vlan_add_flags(vlan, flags))
+ *changed = true;
+
return 0;
}
@@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
if (ret) {
free_percpu(vlan->stats);
kfree(vlan);
+ } else {
+ *changed = true;
}
return ret;
@@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
const struct net_bridge_vlan *pvent;
struct net_bridge_vlan_group *vg;
struct net_bridge_port *p;
+ unsigned long *changed;
+ bool vlchange;
u16 old_pvid;
int err = 0;
- unsigned long *changed;
if (!pvid) {
br_vlan_disable_default_pvid(br);
@@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = br_vlan_add(br, pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY,
+ &vlchange);
if (err)
goto out;
br_vlan_delete(br, old_pvid);
@@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = nbp_vlan_add(p, pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &vlchange);
if (err)
goto err_port;
nbp_vlan_delete(p, old_pvid);
@@ -890,7 +914,8 @@ err_port:
if (old_pvid)
nbp_vlan_add(p, old_pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &vlchange);
nbp_vlan_delete(p, pvid);
}
@@ -899,7 +924,8 @@ err_port:
br_vlan_add(br, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY,
+ &vlchange);
br_vlan_delete(br, pvid);
}
goto out;
@@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br)
{
struct net_bridge_vlan_group *vg;
int ret = -ENOMEM;
+ bool changed;
vg = kzalloc(sizeof(*vg), GFP_KERNEL);
if (!vg)
@@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br)
rcu_assign_pointer(br->vlgrp, vg);
ret = br_vlan_add(br, 1,
BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY);
+ BRIDGE_VLAN_INFO_BRENTRY, &changed);
if (ret)
goto err_vlan_add;
@@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p)
INIT_LIST_HEAD(&vg->vlan_list);
rcu_assign_pointer(p->vlgrp, vg);
if (p->br->default_pvid) {
+ bool changed;
+
ret = nbp_vlan_add(p, p->br->default_pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED,
+ &changed);
if (ret)
goto err_vlan_add;
}
@@ -1016,8 +1046,10 @@ err_vlan_enabled:
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
+ * changed must be true only if the vlan was created or updated
*/
-int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
+int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
+ bool *changed)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = port->dev,
@@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
ASSERT_RTNL();
+ *changed = false;
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
/* Pass the flags to the hardware bridge */
ret = switchdev_port_obj_add(port->dev, &v.obj);
if (ret && ret != -EOPNOTSUPP)
return ret;
- __vlan_add_flags(vlan, flags);
+ *changed = __vlan_add_flags(vlan, flags);
+
return 0;
}
@@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
ret = __vlan_add(vlan, flags);
if (ret)
kfree(vlan);
+ else
+ *changed = true;
return ret;
}
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index be4d0ce..2f28e16 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules for Link Layer filtering on a bridge.
#
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 2585b10..276b602 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
- net->xt.broute_table = ebt_register_table(net, &broute_table, NULL);
- return PTR_ERR_OR_ZERO(net->xt.broute_table);
+ return ebt_register_table(net, &broute_table, NULL,
+ &net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 45a00db..c41da5f 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
static int __net_init frame_filter_net_init(struct net *net)
{
- net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter);
- return PTR_ERR_OR_ZERO(net->xt.frame_filter);
+ return ebt_register_table(net, &frame_filter, ebt_ops_filter,
+ &net->xt.frame_filter);
}
static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 57cd5bb..08df740 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -93,8 +93,8 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
static int __net_init frame_nat_net_init(struct net *net)
{
- net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat);
- return PTR_ERR_OR_ZERO(net->xt.frame_nat);
+ return ebt_register_table(net, &frame_nat, ebt_ops_nat,
+ &net->xt.frame_nat);
}
static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 54c274d..37817d2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1169,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table)
kfree(table);
}
-struct ebt_table *
-ebt_register_table(struct net *net, const struct ebt_table *input_table,
- const struct nf_hook_ops *ops)
+int ebt_register_table(struct net *net, const struct ebt_table *input_table,
+ const struct nf_hook_ops *ops, struct ebt_table **res)
{
struct ebt_table_info *newinfo;
struct ebt_table *t, *table;
@@ -1183,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
repl->entries == NULL || repl->entries_size == 0 ||
repl->counters != NULL || input_table->private != NULL) {
BUGPRINT("Bad table data for ebt_register_table!!!\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
/* Don't add one table to multiple lists. */
@@ -1252,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table,
list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]);
mutex_unlock(&ebt_mutex);
+ WRITE_ONCE(*res, table);
+
if (!ops)
- return table;
+ return 0;
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
if (ret) {
__ebt_unregister_table(net, table);
- return ERR_PTR(ret);
+ *res = NULL;
}
- return table;
+ return ret;
free_unlock:
mutex_unlock(&ebt_mutex);
free_chainstack:
@@ -1276,7 +1277,7 @@ free_newinfo:
free_table:
kfree(table);
out:
- return ERR_PTR(ret);
+ return ret;
}
void ebt_unregister_table(struct net *net, struct ebt_table *table,
diff --git a/net/caif/Makefile b/net/caif/Makefile
index cc2b511..4f6c051 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG
caif-y := caif_dev.o \
diff --git a/net/can/Makefile b/net/can/Makefile
index 1093675..1242bbb 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Controller Area Network core.
#
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 88edac0..003b2d6 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)");
static struct kmem_cache *rcv_cache __read_mostly;
/* table of registered CAN protocols */
-static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly;
+static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly;
static DEFINE_MUTEX(proto_tab_lock);
static atomic_t skbcounter = ATOMIC_INIT(0);
@@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp)
mutex_lock(&proto_tab_lock);
- if (proto_tab[proto]) {
+ if (rcu_access_pointer(proto_tab[proto])) {
pr_err("can: protocol %d already registered\n", proto);
err = -EBUSY;
} else
@@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp)
int proto = cp->protocol;
mutex_lock(&proto_tab_lock);
- BUG_ON(proto_tab[proto] != cp);
+ BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp);
RCU_INIT_POINTER(proto_tab[proto], NULL);
mutex_unlock(&proto_tab_lock);
@@ -875,15 +875,20 @@ static int can_pernet_init(struct net *net)
spin_lock_init(&net->can.can_rcvlists_lock);
net->can.can_rx_alldev_list =
kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL);
-
+ if (!net->can.can_rx_alldev_list)
+ goto out;
net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL);
+ if (!net->can.can_stats)
+ goto out_free_alldev_list;
net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL);
+ if (!net->can.can_pstats)
+ goto out_free_can_stats;
if (IS_ENABLED(CONFIG_PROC_FS)) {
/* the statistics are updated every second (timer triggered) */
if (stats_timer) {
- setup_timer(&net->can.can_stattimer, can_stat_update,
- (unsigned long)net);
+ timer_setup(&net->can.can_stattimer, can_stat_update,
+ 0);
mod_timer(&net->can.can_stattimer,
round_jiffies(jiffies + HZ));
}
@@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net)
}
return 0;
+
+ out_free_can_stats:
+ kfree(net->can.can_stats);
+ out_free_alldev_list:
+ kfree(net->can.can_rx_alldev_list);
+ out:
+ return -ENOMEM;
}
static void can_pernet_exit(struct net *net)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45b..eca6463 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
/* function prototypes for the CAN networklayer procfs (proc.c) */
void can_init_proc(struct net *net);
void can_remove_proc(struct net *net);
-void can_stat_update(unsigned long data);
+void can_stat_update(struct timer_list *t);
#endif /* AF_CAN_H */
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 47a8748d..13690334 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk)
static int bcm_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct net *net;
struct bcm_sock *bo;
struct bcm_op *op, *next;
- if (sk == NULL)
+ if (!sk)
return 0;
+ net = sock_net(sk);
bo = bcm_sk(sk);
/* remove bcm_ops, timer, rx_unregister(), etc. */
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f0..d979b3d 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
return rate;
}
-void can_stat_update(unsigned long data)
+void can_stat_update(struct timer_list *t)
{
- struct net *net = (struct net *)data;
+ struct net *net = from_timer(net, t, can.can_stattimer);
struct s_stats *can_stats = net->can.can_stats;
unsigned long j = jiffies; /* snapshot */
@@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
can_stats->total_rx_match_ratio);
@@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
user_reset = 1;
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
can_pstats->stats_reset + 1);
} else {
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index 6a51809..b4bded4 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for CEPH filesystem.
#
diff --git a/net/ceph/armor.c b/net/ceph/armor.c
index 1fc1ee1..0db8065 100644
--- a/net/ceph/armor.c
+++ b/net/ceph/armor.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 48bb8d9..dbde2b3 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index df45e46..41d2a0c 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 6202153..860ed98 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_AUTH_NONE_H
#define _FS_CEPH_AUTH_NONE_H
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 8757fb8..2f4a1ba 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index 48e9ad4..454cb54 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_AUTH_X_H
#define _FS_CEPH_AUTH_X_H
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 671d305..32c13d7 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __FS_CEPH_AUTH_X_PROTOCOL
#define __FS_CEPH_AUTH_X_PROTOCOL
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index add5f92..5622763 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c
index dcbe67f..756a2dc 100644
--- a/net/ceph/ceph_fs.c
+++ b/net/ceph/ceph_fs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Some non-inline ceph helpers
*/
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 19b7d8a..10e0149 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Ceph string constants
*/
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 08ada89..8d2032b 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/types.h>
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 4b428f4..3d70244 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifdef __KERNEL__
# include <linux/slab.h>
# include <linux/crush/crush.h>
diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c
index ed123af..e5cc603 100644
--- a/net/ceph/crush/hash.c
+++ b/net/ceph/crush/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifdef __KERNEL__
# include <linux/crush/hash.h>
#else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 46008d5..489610a 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 58d83aa..bb45c7d 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_CRYPTO_H
#define _FS_CEPH_CRYPTO_H
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index fa5233e..1eef680 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/device.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index a67298c..ad93342 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/crc32c.h>
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 63edc6e..9ae1bab 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index aaed59a..7257153 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/err.h>
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e02f01f..2814dba 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 79d14d7..0da27c6 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index ce09f73..2ea0564 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/slab.h>
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a7c9a7..ee43bc1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/module.h>
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index 22fb96e..3191d9d 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/slab.h>
#include <linux/gfp.h>
#include <linux/string.h>
diff --git a/net/core/Makefile b/net/core/Makefile
index 56d771a..1fd0a9c 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux networking core.
#
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 0b7b4c2..522873e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
diff --git a/net/core/dev.c b/net/core/dev.c
index 1770097..30b5fe3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -1149,9 +1149,8 @@ static int dev_alloc_name_ns(struct net *net,
return ret;
}
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1167,6 +1166,7 @@ static int dev_get_valid_name(struct net *net,
return 0;
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1295,7 +1295,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
/**
* dev_get_alias - get ifalias of a device
* @dev: device
- * @alias: buffer to store name of ifalias
+ * @name: buffer to store name of ifalias
* @len: size of buffer
*
* get ifalias for a device. Caller must make sure dev cannot go
@@ -1338,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1562,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro);
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1689,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1708,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2038,6 +2040,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
return 0;
}
+EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
@@ -3271,22 +3274,22 @@ EXPORT_SYMBOL(dev_loopback_xmit);
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
- if (!cl)
+ if (!miniq)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
*ret = NET_XMIT_DROP;
kfree_skb(skb);
return NULL;
@@ -4186,7 +4189,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
#ifdef CONFIG_NET_CLS_ACT
- struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
struct tcf_result cl_res;
/* If there's at least one ingress present somewhere (so
@@ -4194,8 +4197,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
* that are not configured with an ingress qdisc will bail
* out here.
*/
- if (!cl)
+ if (!miniq)
return skb;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
@@ -4203,15 +4207,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_at_ingress = 1;
- qdisc_bstats_cpu_update(cl->q, skb);
+ mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, cl, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_cpu_drop(cl->q);
+ mini_qdisc_qstats_cpu_drop(miniq);
kfree_skb(skb);
return NULL;
case TC_ACT_STOLEN:
@@ -4489,6 +4493,33 @@ out:
return ret;
}
+/**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4514,7 +4545,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
return ret;
}
-static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
+static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
{
struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
struct bpf_prog *new = xdp->prog;
@@ -6275,9 +6306,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6295,12 +6336,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6311,7 +6347,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6336,9 +6372,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6357,10 +6395,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6375,20 +6414,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6404,11 +6447,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6534,11 +6579,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6829,11 +6876,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -7040,26 +7090,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
}
EXPORT_SYMBOL(dev_change_proto_down);
-u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id)
+u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
xdp.command = XDP_QUERY_PROG;
/* Query must always succeed. */
- WARN_ON(xdp_op(dev, &xdp) < 0);
+ WARN_ON(bpf_op(dev, &xdp) < 0);
if (prog_id)
*prog_id = xdp.prog_id;
return xdp.prog_attached;
}
-static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
+static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
- struct netdev_xdp xdp;
+ struct netdev_bpf xdp;
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -7070,7 +7120,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op,
xdp.flags = flags;
xdp.prog = prog;
- return xdp_op(dev, &xdp);
+ return bpf_op(dev, &xdp);
}
/**
@@ -7087,32 +7137,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
{
const struct net_device_ops *ops = dev->netdev_ops;
struct bpf_prog *prog = NULL;
- xdp_op_t xdp_op, xdp_chk;
+ bpf_op_t bpf_op, bpf_chk;
int err;
ASSERT_RTNL();
- xdp_op = xdp_chk = ops->ndo_xdp;
- if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
+ bpf_op = bpf_chk = ops->ndo_bpf;
+ if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE)))
return -EOPNOTSUPP;
- if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE))
- xdp_op = generic_xdp_install;
- if (xdp_op == xdp_chk)
- xdp_chk = generic_xdp_install;
+ if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE))
+ bpf_op = generic_xdp_install;
+ if (bpf_op == bpf_chk)
+ bpf_chk = generic_xdp_install;
if (fd >= 0) {
- if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL))
+ if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
return -EEXIST;
if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- __dev_xdp_attached(dev, xdp_op, NULL))
+ __dev_xdp_attached(dev, bpf_op, NULL))
return -EBUSY;
- prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (bpf_op == ops->ndo_bpf)
+ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
+ dev);
+ else
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
- err = dev_xdp_install(dev, xdp_op, extack, flags, prog);
+ err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
if (err < 0 && prog)
bpf_prog_put(prog);
@@ -7204,7 +7258,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
@@ -8291,7 +8345,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -8347,7 +8401,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 709a4e6..7e690d0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
- dev->tx_queue_len = ifr->ifr_qlen;
+ if (dev->tx_queue_len ^ ifr->ifr_qlen) {
+ unsigned int orig_len = dev->tx_queue_len;
+
+ dev->tx_queue_len = ifr->ifr_qlen;
+ err = call_netdevice_notifiers(
+ NETDEV_CHANGE_TX_QUEUE_LEN, dev);
+ err = notifier_to_errno(err);
+ if (err) {
+ dev->tx_queue_len = orig_len;
+ return err;
+ }
+ }
return 0;
case SIOCSIFNAME:
diff --git a/net/core/dst.c b/net/core/dst.c
index a6c47da..662a2d4 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+
+ if (one_md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
+ }
+#endif
+ free_percpu(md_dst);
+}
+EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3228411..f8fcf45 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
+/* Given two link masks, AND them together and save the result in dst. */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+ struct ethtool_link_ksettings *src)
+{
+ unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int idx = 0;
+
+ for (; idx < size; idx++) {
+ dst->link_modes.supported[idx] &=
+ src->link_modes.supported[idx];
+ dst->link_modes.advertising[idx] &=
+ src->link_modes.advertising[idx];
+ }
+}
+EXPORT_SYMBOL(ethtool_intersect_link_masks);
+
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32)
{
@@ -436,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
/* return false if legacy contained non-0 deprecated fields
- * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
*/
static bool
convert_legacy_settings_to_link_ksettings(
@@ -451,8 +467,7 @@ convert_legacy_settings_to_link_ksettings(
* deprecated legacy fields, and they should not use
* %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
*/
- if (legacy_settings->transceiver ||
- legacy_settings->maxtxpkt ||
+ if (legacy_settings->maxtxpkt ||
legacy_settings->maxrxpkt)
retval = false;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a6d97c..fafd0a4 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -314,10 +314,12 @@ static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net,
static int call_fib_rule_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib_rule *rule,
- struct fib_rules_ops *ops)
+ struct fib_rules_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct fib_rule_notifier_info info = {
.info.family = ops->family,
+ .info.extack = extack,
.rule = rule,
};
@@ -609,7 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -749,7 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops);
+ call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops,
+ NULL);
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
NETLINK_CB(skb).portid);
fib_rule_put(rule);
diff --git a/net/core/filter.c b/net/core/filter.c
index 9b6e7e8..1afa179 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
+#include <asm/cmpxchg.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -989,10 +990,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
- bool ret = __sk_filter_charge(sk, fp);
- if (ret)
- refcount_inc(&fp->refcnt);
- return ret;
+ if (!refcount_inc_not_zero(&fp->refcnt))
+ return false;
+
+ if (!__sk_filter_charge(sk, fp)) {
+ sk_filter_release(fp);
+ return false;
+ }
+ return true;
}
static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
@@ -1835,31 +1840,32 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags)
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+ struct bpf_map *, map, u32, key, u64, flags)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+ /* If user passes invalid input drop the packet. */
if (unlikely(flags))
- return SK_ABORTED;
+ return SK_DROP;
- ri->ifindex = key;
- ri->flags = flags;
- ri->map = map;
+ tcb->bpf.key = key;
+ tcb->bpf.flags = flags;
+ tcb->bpf.map = map;
- return SK_REDIRECT;
+ return SK_PASS;
}
-struct sock *do_sk_redirect_map(void)
+struct sock *do_sk_redirect_map(struct sk_buff *skb)
{
- struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL;
- if (ri->map) {
- sk = __sock_map_lookup_elem(ri->map, ri->ifindex);
+ if (tcb->bpf.map) {
+ sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
- ri->ifindex = 0;
- ri->map = NULL;
- /* we do not clear flags for future lookup */
+ tcb->bpf.key = 0;
+ tcb->bpf.map = NULL;
}
return sk;
@@ -1869,9 +1875,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
};
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
@@ -2521,10 +2528,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
if (err)
return err;
- if (map)
+ dev->netdev_ops->ndo_xdp_flush(dev);
+ return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+ struct bpf_map *map,
+ struct xdp_buff *xdp,
+ u32 index)
+{
+ int err;
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ struct net_device *dev = fwd;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit)
+ return -EOPNOTSUPP;
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ if (err)
+ return err;
__dev_map_insert_ctx(map, index);
- else
- dev->netdev_ops->ndo_xdp_flush(dev);
+
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ struct bpf_cpu_map_entry *rcpu = fwd;
+
+ err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+ if (err)
+ return err;
+ __cpu_map_insert_ctx(map, index);
+ }
return 0;
}
@@ -2534,11 +2567,33 @@ void xdp_do_flush_map(void)
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
- if (map)
- __dev_map_flush(map);
+ if (map) {
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ __dev_map_flush(map);
+ break;
+ case BPF_MAP_TYPE_CPUMAP:
+ __cpu_map_flush(map);
+ break;
+ default:
+ break;
+ }
+ }
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ return __dev_map_lookup_elem(map, index);
+ case BPF_MAP_TYPE_CPUMAP:
+ return __cpu_map_lookup_elem(map, index);
+ default:
+ return NULL;
+ }
+}
+
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
unsigned long aux)
{
@@ -2551,8 +2606,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
- struct net_device *fwd = NULL;
u32 index = ri->ifindex;
+ void *fwd = NULL;
int err;
ri->ifindex = 0;
@@ -2565,7 +2620,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
goto err;
}
- fwd = __dev_map_lookup_elem(map, index);
+ fwd = __xdp_map_lookup_elem(map, index);
if (!fwd) {
err = -EINVAL;
goto err;
@@ -2573,7 +2628,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
if (ri->map_to_flush && ri->map_to_flush != map)
xdp_do_flush_map();
- err = __bpf_tx_xdp(fwd, map, xdp, index);
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
if (unlikely(err))
goto err;
@@ -2615,54 +2670,88 @@ err:
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
-int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
+{
+ unsigned int len;
+
+ if (unlikely(!(fwd->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+ if (skb->len > len)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
struct net_device *fwd = NULL;
u32 index = ri->ifindex;
- unsigned int len;
int err = 0;
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
- if (map) {
- if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
- err = -EFAULT;
- map = NULL;
- goto err;
- }
- fwd = __dev_map_lookup_elem(map, index);
- } else {
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+ err = -EFAULT;
+ map = NULL;
+ goto err;
}
+ fwd = __xdp_map_lookup_elem(map, index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
- if (unlikely(!(fwd->flags & IFF_UP))) {
- err = -ENETDOWN;
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+ skb->dev = fwd;
+ } else {
+ /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
+ err = -EBADRQC;
goto err;
}
- len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
- if (skb->len > len) {
- err = -EMSGSIZE;
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ return err;
+}
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ u32 index = ri->ifindex;
+ struct net_device *fwd;
+ int err = 0;
+
+ if (ri->map)
+ return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+
+ ri->ifindex = 0;
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
goto err;
}
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+
skb->dev = fwd;
- map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index)
- : _trace_xdp_redirect(dev, xdp_prog, index);
+ _trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
- map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err)
- : _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
@@ -2983,14 +3072,15 @@ static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* Race is not possible, since it's called from verifier
- * that is holding verifier mutex.
- */
- md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
- METADATA_IP_TUNNEL,
- GFP_KERNEL);
- if (!md_dst)
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+ METADATA_IP_TUNNEL,
+ GFP_KERNEL);
+ if (!tmp)
return NULL;
+ if (cmpxchg(&md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
}
switch (which) {
@@ -3185,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
static const struct bpf_func_proto bpf_setsockopt_proto = {
.func = bpf_setsockopt,
- .gpl_only = true,
+ .gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
@@ -3194,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+ int, level, int, optname, char *, optval, int, optlen)
+{
+ struct sock *sk = bpf_sock->sk;
+
+ if (!sk_fullsock(sk))
+ goto err_clear;
+
+#ifdef CONFIG_INET
+ if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+ if (optname == TCP_CONGESTION) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (!icsk->icsk_ca_ops || optlen <= 1)
+ goto err_clear;
+ strncpy(optval, icsk->icsk_ca_ops->name, optlen);
+ optval[optlen - 1] = 0;
+ } else {
+ goto err_clear;
+ }
+ } else {
+ goto err_clear;
+ }
+ return 0;
+#endif
+err_clear:
+ memset(optval, 0, optlen);
+ return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_getsockopt_proto = {
+ .func = bpf_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3372,6 +3503,8 @@ static const struct bpf_func_proto *
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_getsockopt_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3736,7 +3869,6 @@ static bool sk_skb_is_valid_access(int off, int size,
if (type == BPF_WRITE) {
switch (off) {
- case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority):
break;
@@ -3746,6 +3878,8 @@ static bool sk_skb_is_valid_access(int off, int size,
}
switch (off) {
+ case bpf_ctx_range(struct __sk_buff, mark):
+ return false;
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
@@ -4307,68 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-const struct bpf_verifier_ops sk_filter_prog_ops = {
+static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog, u32 *target_size)
+{
+ struct bpf_insn *insn = insn_buf;
+ int off;
+
+ switch (si->off) {
+ case offsetof(struct __sk_buff, data_end):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_end);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct tcp_skb_cb, bpf.data_end);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+ default:
+ return bpf_convert_ctx_access(type, si, insn_buf, prog,
+ target_size);
+ }
+
+ return insn - insn_buf;
+}
+
+const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-const struct bpf_verifier_ops tc_cls_act_prog_ops = {
+const struct bpf_prog_ops sk_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops tc_cls_act_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops xdp_prog_ops = {
+const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+};
+
+const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
-const struct bpf_verifier_ops cg_skb_prog_ops = {
+const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_inout_prog_ops = {
+const struct bpf_verifier_ops lwt_inout_verifier_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_inout_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_xmit_prog_ops = {
+const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops cg_sock_prog_ops = {
+const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-const struct bpf_verifier_ops sock_ops_prog_ops = {
+const struct bpf_prog_ops cg_sock_prog_ops = {
+};
+
+const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
.convert_ctx_access = sock_ops_convert_ctx_access,
};
-const struct bpf_verifier_ops sk_skb_prog_ops = {
+const struct bpf_prog_ops sock_ops_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_skb_verifier_ops = {
.get_func_proto = sk_skb_func_proto,
.is_valid_access = sk_skb_is_valid_access,
- .convert_ctx_access = bpf_convert_ctx_access,
+ .convert_ctx_access = sk_skb_convert_ctx_access,
.gen_prologue = sk_skb_prologue,
};
+const struct bpf_prog_ops sk_skb_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 814e58a..4b54e5f 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 4847964..615ccab 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 2745a1b..006876c 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_SYSFS_H__
#define __NET_SYSFS_H__
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 1132820..38093458 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* consolidates trace point definitions
*
@@ -31,6 +32,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/tcp.h>
#include <trace/events/fib.h>
#include <trace/events/qdisc.h>
#if IS_ENABLED(CONFIG_IPV6)
@@ -48,3 +50,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6cfdc7c..b797832 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
rtnl_net_notifyid(net, RTM_NEWNSID, id);
return id;
}
+EXPORT_SYMBOL_GPL(peernet2id_alloc);
/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6e1e10f..e3fa53a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
+ pkt_dev->pkt_overhead;
}
- for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ for (i = 0; i < sizeof(struct in6_addr); i++)
if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
set = 1;
break;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3961f87..dabba2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry(ops, &rtnl_af_ops, list) {
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
- list_add_tail(&ops->list, &rtnl_af_ops);
+ list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del_rcu(&ops->list);
rtnl_unlock();
+
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
+ rcu_read_unlock();
return size;
}
@@ -927,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
- + nla_total_size(1); /* IFLA_PROTO_DOWN */
-
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ + nla_total_size(1) /* IFLA_PROTO_DOWN */
+ + nla_total_size(4) /* IFLA_IF_NETNSID */
+ + 0;
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1276,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id)
*prog_id = generic_xdp_prog->aux->id;
return XDP_ATTACHED_SKB;
}
- if (!ops->ndo_xdp)
+ if (!ops->ndo_bpf)
return XDP_ATTACHED_NONE;
- return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id);
+ return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id);
}
static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1377,13 +1371,14 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
}
static int rtnl_fill_link_netnsid(struct sk_buff *skb,
- const struct net_device *dev)
+ const struct net_device *dev,
+ struct net *src_net)
{
if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
+ int id = peernet2id_alloc(src_net, link_net);
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
return -EMSGSIZE;
@@ -1393,15 +1388,55 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
return 0;
}
-static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+static int rtnl_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ const struct rtnl_af_ops *af_ops;
+ struct nlattr *af_spec;
+
+ af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af_spec)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+ struct nlattr *af;
+ int err;
+
+ if (!af_ops->fill_link_af)
+ continue;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ return -EMSGSIZE;
+
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, af);
+ }
+
+ nla_nest_end(skb, af_spec);
+ return 0;
+}
+
+static int rtnl_fill_ifinfo(struct sk_buff *skb,
+ struct net_device *dev, struct net *src_net,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid, int tgt_netnsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct nlattr *af_spec;
- struct rtnl_af_ops *af_ops;
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1416,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change;
+ if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid))
+ goto nla_put_failure;
+
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) ||
nla_put_u8(skb, IFLA_OPERSTATE,
@@ -1481,42 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (rtnl_fill_link_netnsid(skb, dev))
+ if (rtnl_fill_link_netnsid(skb, dev, src_net))
goto nla_put_failure;
- if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
- if (af_ops->fill_link_af) {
- struct nlattr *af;
- int err;
-
- if (!(af = nla_nest_start(skb, af_ops->family)))
- goto nla_put_failure;
-
- err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
- /*
- * Caller may return ENODATA to indicate that there
- * was no data to be dumped. This is not an error, it
- * means we should trim the attribute header and
- * continue.
- */
- if (err == -ENODATA)
- nla_nest_cancel(skb, af);
- else if (err < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, af);
- }
- }
-
- nla_nest_end(skb, af_spec);
+ rcu_read_lock();
+ if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1538,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
[IFLA_NET_NS_FD] = { .type = NLA_U32 },
- [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
+ /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to
+ * allow 0-length string (needed to remove an alias).
+ */
+ [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
[IFLA_PORT_SELF] = { .type = NLA_NESTED },
@@ -1555,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_XDP] = { .type = NLA_NESTED },
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
+ [IFLA_IF_NETNSID] = { .type = NLA_S32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1658,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev,
return false;
}
+static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+{
+ struct net *net;
+
+ net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+ if (!net)
+ return ERR_PTR(-EINVAL);
+
+ /* For now, the caller is required to have CAP_NET_ADMIN in
+ * the user namespace owning the target net ns.
+ */
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EACCES);
+ }
+ return net;
+}
+
static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
int h, s_h;
int idx = 0, s_idx;
struct net_device *dev;
@@ -1670,6 +1712,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
const struct rtnl_link_ops *kind_ops = NULL;
unsigned int flags = NLM_F_MULTI;
int master_idx = 0;
+ int netnsid = -1;
int err;
int hdrlen;
@@ -1688,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX,
ifla_policy, NULL) >= 0) {
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net)) {
+ tgt_net = net;
+ netnsid = -1;
+ }
+ }
+
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1703,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
- head = &net->dev_index_head[h];
+ head = &tgt_net->dev_index_head[h];
hlist_for_each_entry(dev, head, index_hlist) {
if (link_dump_filtered(dev, master_idx, kind_ops))
goto cont;
if (idx < s_idx)
goto cont;
- err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
+ err = rtnl_fill_ifinfo(skb, dev, net,
+ RTM_NEWLINK,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL,
+ netnsid);
if (err < 0) {
if (likely(skb->len))
@@ -1732,6 +1786,8 @@ out_err:
cb->args[0] = h;
cb->seq = net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -1778,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ rcu_read_lock();
+ af_ops = rtnl_af_lookup(nla_type(af));
+ if (!af_ops) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
- if (!af_ops->set_link_af)
+ if (!af_ops->set_link_af) {
+ rcu_read_unlock();
return -EOPNOTSUPP;
+ }
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
}
+
+ rcu_read_unlock();
}
}
@@ -1964,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1989,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -2122,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2148,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb,
dev->tx_queue_len = orig_len;
goto errout;
}
- status |= DO_SETLINK_NOTIFY;
+ status |= DO_SETLINK_MODIFIED;
}
}
@@ -2245,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
- BUG();
+ rcu_read_lock();
+
+ BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
goto errout;
+ }
+ rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -2303,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb,
errout:
if (status & DO_SETLINK_MODIFIED) {
- if (status & DO_SETLINK_NOTIFY)
+ if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
netdev_state_change(dev);
if (err < 0)
@@ -2329,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2423,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
@@ -2554,6 +2631,9 @@ replay:
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID])
+ return -EOPNOTSUPP;
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -2631,12 +2711,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2766,7 +2840,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2792,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
+ struct net *tgt_net = net;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
struct net_device *dev = NULL;
struct sk_buff *nskb;
+ int netnsid = -1;
int err;
u32 ext_filter_mask = 0;
@@ -2804,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ if (tb[IFLA_IF_NETNSID]) {
+ netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
+ tgt_net = get_target_net(skb, netnsid);
+ if (IS_ERR(tgt_net))
+ return PTR_ERR(tgt_net);
+ }
+
if (tb[IFLA_IFNAME])
nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
if (tb[IFLA_EXT_MASK])
ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
+ err = -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifi_index > 0)
- dev = __dev_get_by_index(net, ifm->ifi_index);
+ dev = __dev_get_by_index(tgt_net, ifm->ifi_index);
else if (tb[IFLA_IFNAME])
- dev = __dev_get_by_name(net, ifname);
+ dev = __dev_get_by_name(tgt_net, ifname);
else
- return -EINVAL;
+ goto out;
+ err = -ENODEV;
if (dev == NULL)
- return -ENODEV;
+ goto out;
+ err = -ENOBUFS;
nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
if (nskb == NULL)
- return -ENOBUFS;
+ goto out;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ err = rtnl_fill_ifinfo(nskb, dev, net,
+ RTM_NEWLINK, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask,
+ 0, NULL, netnsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+out:
+ if (netnsid >= 0)
+ put_net(tgt_net);
return err;
}
@@ -2911,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2922,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
+ type, 0, 0, change, 0, 0, event,
+ new_nsid, -1);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2945,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2960,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
+}
+
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid);
}
-EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
@@ -3069,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+ struct netlink_ext_ack *extack)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
+ NL_SET_ERR_MSG(extack, "invalid vlan id");
return -EINVAL;
}
}
@@ -3108,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3212,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3669,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3744,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3909,6 +4009,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
return -EMSGSIZE;
ifsm = nlmsg_data(nlh);
+ ifsm->family = PF_UNSPEC;
+ ifsm->pad1 = 0;
+ ifsm->pad2 = 0;
ifsm->ifindex = dev->ifindex;
ifsm->filter_mask = filter_mask;
@@ -3992,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
err = af_ops->fill_stats_af(skb, dev);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
nla_nest_cancel(skb, af);
- else if (err < 0)
+ } else if (err < 0) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
nla_nest_end(skb, af);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, attr);
@@ -4079,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
@@ -4088,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
size += nla_total_size(0);
}
}
+ rcu_read_unlock();
}
return size;
@@ -4331,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
switch (event) {
case NETDEV_REBOOT:
+ case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
case NETDEV_FEAT_CHANGE:
case NETDEV_BONDING_FAILOVER:
+ case NETDEV_POST_TYPE_CHANGE:
case NETDEV_NOTIFY_PEERS:
+ case NETDEV_CHANGEUPPER:
case NETDEV_RESEND_IGMP:
case NETDEV_CHANGEINFODATA:
+ case NETDEV_CHANGELOWERSTATE:
+ case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
break;
default:
break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d98c2e3..97e604d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1124,9 +1124,13 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) {
+ struct sock *save_sk = skb->sk;
+
/* Streams do not free skb on error. Reset to prev state. */
msg->msg_iter = orig_iter;
+ skb->sk = sk;
___pskb_trim(skb, orig_len);
+ skb->sk = save_sk;
return err;
}
@@ -1350,8 +1354,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb->len);
- if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
copy_skb_header(n, skb);
return n;
@@ -1449,8 +1452,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
BUG_ON(nhead < 0);
- if (skb_shared(skb))
- BUG();
+ BUG_ON(skb_shared(skb));
size = SKB_DATA_ALIGN(size);
@@ -1595,9 +1597,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
head_copy_off = newheadroom - head_copy_len;
/* Copy the linear header and data. */
- if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
- skb->len + head_copy_len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ skb->len + head_copy_len));
copy_skb_header(n, skb);
@@ -1878,8 +1879,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
- BUG();
+ BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+ skb_tail_pointer(skb), delta));
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
@@ -1898,7 +1899,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
}
/* If we need update frag list, we are in troubles.
- * Certainly, it possible to add an offset to skb data,
+ * Certainly, it is possible to add an offset to skb data,
* but taking into account that pulling is expected to
* be very rare operation, it is worth to fight against
* further bloating skb head and crucify ourselves here instead.
@@ -4767,6 +4768,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)
{
+ struct skb_shared_info *to_shinfo, *from_shinfo;
int i, delta, len = from->len;
*fragstolen = false;
@@ -4781,7 +4783,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
- if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ to_shinfo = skb_shinfo(to);
+ from_shinfo = skb_shinfo(from);
+ if (to_shinfo->frag_list || from_shinfo->frag_list)
return false;
if (skb_zcopy(to) || skb_zcopy(from))
return false;
@@ -4790,8 +4794,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
struct page *page;
unsigned int offset;
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags >= MAX_SKB_FRAGS)
return false;
if (skb_head_is_locked(from))
@@ -4802,12 +4806,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
} else {
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags > MAX_SKB_FRAGS)
return false;
delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4815,19 +4819,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
WARN_ON_ONCE(delta < len);
- memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- skb_shinfo(from)->frags,
- skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ from_shinfo->frags,
+ from_shinfo->nr_frags * sizeof(skb_frag_t));
+ to_shinfo->nr_frags += from_shinfo->nr_frags;
if (!skb_cloned(from))
- skb_shinfo(from)->nr_frags = 0;
+ from_shinfo->nr_frags = 0;
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- skb_frag_ref(from, i);
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
to->truesize += delta;
to->len += len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 9b7b6bb..7594000 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1654,6 +1654,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
+ newsk->sk_prot_creator = sk->sk_prot;
+
/* SANITY */
if (likely(newsk->sk_net_refcnt))
get_net(sock_net(newsk));
@@ -1675,20 +1677,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_dst_pending_confirm = 0;
newsk->sk_wmem_queued = 0;
newsk->sk_forward_alloc = 0;
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
atomic_set(&newsk->sk_drops, 0);
newsk->sk_send_head = NULL;
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
- filter = rcu_dereference_protected(newsk->sk_filter, 1);
+ rcu_read_lock();
+ filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
/* though it's an empty new sock, the charging may fail
* if sysctl_optmem_max was changed between creation of
* original socket and cloning
*/
is_charged = sk_filter_charge(newsk, filter);
+ RCU_INIT_POINTER(newsk->sk_filter, filter);
+ rcu_read_unlock();
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
/* We need to make sure that we don't uncharge the new
@@ -1709,9 +1719,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0);
- mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2678,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_init_common(sk);
sk->sk_send_head = NULL;
- init_timer(&sk->sk_timer);
+ timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index eed1ebf..5eeb1d2 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* To speed up listener socket lookup, create an array to store all sockets
* listening on the same port. This allows a decision to be made after finding
@@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
diff --git a/net/core/stream.c b/net/core/stream.c
index 20231db..1cff9c6 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SUCS NET3:
*
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7cd9aa..cbc3dde 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_core.c: sysctl interface to net core subsystem.
*
diff --git a/net/core/tso.c b/net/core/tso.c
index 5dca7ce..43f4eba 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/if_vlan.h>
#include <net/ip.h>
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 5c8362b..2e7b560 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e1295d5..1c75cd1 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
DCCPF_SEQ_WMAX));
}
-static void ccid2_hc_tx_rto_expire(unsigned long data)
+static void ccid2_hc_tx_rto_expire(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
+ struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
+ struct sock *sk = hc->sk;
const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
bh_lock_sock(sk);
@@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_rpdupack = -1;
hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
hc->tx_cwnd_used = 0;
- setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
- (unsigned long)sk);
+ hc->sk = sk;
+ timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
INIT_LIST_HEAD(&hc->tx_av_chunks);
return 0;
}
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 6e50ef2..1af0116 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock {
tx_rto;
u64 tx_rtt_seq:48;
struct timer_list tx_rtotimer;
+ struct sock *sk;
/* Congestion Window validation (optional, RFC 2861) */
u32 tx_cwnd_used,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 119c043..8b5ba6d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
}
}
-static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
+ struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
+ struct sock *sk = hc->sk;
unsigned long t_nfb = USEC_PER_SEC / 5;
bh_lock_sock(sk);
@@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
hc->tx_state = TFRC_SSTATE_NO_SENT;
hc->tx_hist = NULL;
- setup_timer(&hc->tx_no_feedback_timer,
- ccid3_hc_tx_no_feedback_timer, (unsigned long)sk);
+ hc->sk = sk;
+ timer_setup(&hc->tx_no_feedback_timer,
+ ccid3_hc_tx_no_feedback_timer, 0);
return 0;
}
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 1a9933c..813d91c 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock {
u8 tx_last_win_count;
ktime_t tx_t_last_win_count;
struct timer_list tx_no_feedback_timer;
+ struct sock *sk;
ktime_t tx_t_ld;
ktime_t tx_t_nom;
struct tfrc_tx_hist_entry *tx_hist;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 08df7a3..876e185 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
const u8 idx_a = tfrc_rx_hist_index(h, a),
idx_b = tfrc_rx_hist_index(h, b);
- struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
- h->ring[idx_a] = h->ring[idx_b];
- h->ring[idx_b] = tmp;
+ swap(h->ring[idx_a], h->ring[idx_b]);
}
/*
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index 62b5828..d7f265e 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* TFRC library initialisation
*
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be97..d28d46b 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
case DCCP_PKT_DATA:
if (sk->sk_state == DCCP_RESPOND)
break;
+ /* fall through */
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 001c086..e65fcb4 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
- newinet->inet_opt = ireq->opt;
- ireq->opt = NULL;
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+ if (*own_req)
+ ireq->ireq_opt = NULL;
+ else
+ newinet->inet_opt = NULL;
return newsk;
exit_overflow:
@@ -441,6 +443,7 @@ exit:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -548,7 +551,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
void dccp_syn_ack_timeout(const struct request_sock *req)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3..4e40db0 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
* Ack vectors are processed by the TX CCID if it is
* interested. The RX CCID need not parse Ack Vectors,
* since it is only interested in clearing old state.
- * Fall through.
*/
+ /* fall through */
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
pkt_type, opt, value, len))
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c340..b50a873 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
__sk_dst_reset(sk);
}
-static void dccp_write_timer(unsigned long data)
+static void dccp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
int event = 0;
bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
sock_put(sk);
}
-static void dccp_keepalive_timer(unsigned long data)
+static void dccp_keepalive_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
pr_err("dccp should not use a keepalive timer !\n");
sock_put(sk);
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
+static void dccp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
@@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data)
bh_unlock_sock(sk);
}
-static void dccp_write_xmit_timer(unsigned long data)
+static void dccp_write_xmit_timer(struct timer_list *t)
{
- dccp_write_xmitlet(data);
- sock_put((struct sock *)data);
+ struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
+ struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
+
+ dccp_write_xmitlet((unsigned long)sk);
+ sock_put(sk);
}
void dccp_init_xmit_timers(struct sock *sk)
@@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk)
struct dccp_sock *dp = dccp_sk(sk);
tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
- setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
- (unsigned long)sk);
+ timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
&dccp_keepalive_timer);
}
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
index e44003a..9e38122 100644
--- a/net/decnet/Makefile
+++ b/net/decnet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DECNET) += decnet.o
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399..518cea1 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
scp->keepalive = 10 * HZ;
scp->keepalive_fxn = dn_keepalive;
- init_timer(&scp->delack_timer);
- scp->delack_pending = 0;
- scp->delack_fxn = dn_nsp_delayed_ack;
-
dn_start_slow_timer(sk);
out:
return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
goto disc_reject;
case DN_RUN:
scp->state = DN_DI;
+ /* fall through */
case DN_DI:
case DN_DR:
disc_reject:
dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+ /* fall through */
case DN_NC:
case DN_NR:
case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
break;
default:
printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+ /* fall through */
case DN_O:
dn_stop_slow_timer(sk);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4d339de..9153247 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
@@ -1038,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev)
static void dn_dev_set_timer(struct net_device *dev);
-static void dn_dev_timer_func(unsigned long arg)
+static void dn_dev_timer_func(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
- struct dn_dev *dn_db;
+ struct dn_dev *dn_db = from_timer(dn_db, t, timer);
+ struct net_device *dev;
struct dn_ifaddr *ifa;
rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
+ dev = dn_db->dev;
if (dn_db->t3 <= dn_db->parms.t2) {
if (dn_db->parms.timer3) {
for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1070,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev)
if (dn_db->parms.t2 > dn_db->parms.t3)
dn_db->parms.t2 = dn_db->parms.t3;
- dn_db->timer.data = (unsigned long)dev;
- dn_db->timer.function = dn_dev_timer_func;
dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
add_timer(&dn_db->timer);
@@ -1100,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
rcu_assign_pointer(dev->dn_ptr, dn_db);
dn_db->dev = dev;
- init_timer(&dn_db->timer);
+ timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
dn_db->uptime = jiffies;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 3d37464..b37a1b8 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 22bf0b9..528119a 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 7ac086d..1b212064 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
* Swap src & dst and look up in the normal way.
*/
if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
- __le16 tmp = cb->dst_port;
- cb->dst_port = cb->src_port;
- cb->src_port = tmp;
- tmp = cb->dst;
- cb->dst = cb->src;
- cb->src = tmp;
+ swap(cb->dst_port, cb->src_port);
+ swap(cb->dst, cb->src);
}
/*
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e..56a52a0 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c
ackcrs |= 0x8000;
/* If this is an "other data/ack" message, swap acknum and ackcrs */
- if (other) {
- unsigned short tmp = acknum;
- acknum = ackcrs;
- ackcrs = tmp;
- }
+ if (other)
+ swap(acknum, ackcrs);
/* Set "cross subchannel" bit in ackcrs */
ackcrs |= 0x2000;
@@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk)
dn_nsp_send(skb);
}
-void dn_nsp_delayed_ack(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->ackxmt_oth != scp->numoth_rcv)
- dn_nsp_send_oth_ack(sk);
-
- if (scp->ackxmt_dat != scp->numdat_rcv)
- dn_nsp_send_data_ack(sk);
-}
-
static int dn_nsp_retrans_conn_conf(struct sock *sk)
{
struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0bd3afd..bff5ab8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
- dst_use(&rth->dst, now);
+ dst_hold_and_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
dst_release_immediate(&rt->dst);
@@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
- dst_use(&rt->dst, now);
+ dst_hold_and_use(&rt->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
*rp = rt;
return 0;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
dn_is_output_route(rt) &&
(rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
*pprt = &rt->dst;
return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
(rt->fld.flowidn_oif == 0) &&
(rt->fld.flowidn_mark == skb->mark) &&
(rt->fld.flowidn_iif == cb->iif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock();
skb_dst_set(skb, (struct dst_entry *)rt);
return 0;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 295bbd6..c795c3f 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 2326754..08667f6 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 1d330fd..f430dae 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 6c7da6c..55bf64a 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* DECnet An implementation of the DECnet protocol suite for the LINUX
* operating system. DECnet is implemented using the BSD Socket
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8737412..e1d4d89 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
static void dns_resolver_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
- if (key_is_instantiated(key)) {
+ if (key_is_positive(key)) {
int err = PTR_ERR(key->payload.data[dns_key_error]);
if (err)
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 2e7ac8b..e9a4a0f 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 51ca2a5..b8f2d9f 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
@@ -67,37 +68,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
[DSA_TAG_PROTO_NONE] = &none_ops,
};
-int dsa_cpu_dsa_setup(struct dsa_port *port)
-{
- struct device_node *port_dn = port->dn;
- struct dsa_switch *ds = port->ds;
- struct phy_device *phydev;
- int ret, mode;
-
- if (of_phy_is_fixed_link(port_dn)) {
- ret = of_phy_register_fixed_link(port_dn);
- if (ret) {
- dev_err(ds->dev, "failed to register fixed PHY\n");
- return ret;
- }
- phydev = of_phy_find_device(port_dn);
-
- mode = of_get_phy_mode(port_dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
-
- genphy_config_init(phydev);
- genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port->index, phydev);
-
- put_device(&phydev->mdio.dev);
- }
-
- return 0;
-}
-
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
{
const struct dsa_device_ops *ops;
@@ -112,14 +82,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
return ops;
}
-void dsa_cpu_dsa_destroy(struct dsa_port *port)
-{
- struct device_node *port_dn = port->dn;
-
- if (of_phy_is_fixed_link(port_dn))
- of_phy_deregister_fixed_link(port_dn);
-}
-
static int dev_is_class(struct device *dev, void *class)
{
if (dev->class != NULL && !strcmp(dev->class->name, class))
@@ -200,7 +162,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_PM_SLEEP
static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
{
- return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+ return dsa_is_user_port(ds, p) && ds->ports[p].slave;
}
int dsa_switch_suspend(struct dsa_switch *ds)
@@ -212,7 +174,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i].netdev);
+ ret = dsa_slave_suspend(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -239,7 +201,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i].netdev);
+ ret = dsa_slave_resume(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -261,6 +223,28 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
+static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
+
+int register_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_dsa_notifier);
+
+int unregister_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
+
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info)
+{
+ info->dev = dev;
+ return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
+}
+EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 54ed054..283104e 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,65 +21,77 @@
#include "dsa_priv.h"
-static LIST_HEAD(dsa_switch_trees);
+static LIST_HEAD(dsa_tree_list);
static DEFINE_MUTEX(dsa2_mutex);
static const struct devlink_ops dsa_devlink_ops = {
};
-static struct dsa_switch_tree *dsa_get_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_find(int index)
{
struct dsa_switch_tree *dst;
- list_for_each_entry(dst, &dsa_switch_trees, list)
- if (dst->tree == tree) {
- kref_get(&dst->refcount);
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ if (dst->index == index)
return dst;
- }
+
return NULL;
}
-static void dsa_free_dst(struct kref *ref)
+static struct dsa_switch_tree *dsa_tree_alloc(int index)
{
- struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree,
- refcount);
+ struct dsa_switch_tree *dst;
- list_del(&dst->list);
- kfree(dst);
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return NULL;
+
+ dst->index = index;
+
+ INIT_LIST_HEAD(&dst->list);
+ list_add_tail(&dsa_tree_list, &dst->list);
+
+ /* Initialize the reference counter to the number of switches, not 1 */
+ kref_init(&dst->refcount);
+ refcount_set(&dst->refcount.refcount, 0);
+
+ return dst;
}
-static void dsa_put_dst(struct dsa_switch_tree *dst)
+static void dsa_tree_free(struct dsa_switch_tree *dst)
{
- kref_put(&dst->refcount, dsa_free_dst);
+ list_del(&dst->list);
+ kfree(dst);
}
-static struct dsa_switch_tree *dsa_add_dst(u32 tree)
+static struct dsa_switch_tree *dsa_tree_touch(int index)
{
struct dsa_switch_tree *dst;
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = dsa_tree_find(index);
if (!dst)
- return NULL;
- dst->tree = tree;
- INIT_LIST_HEAD(&dst->list);
- list_add_tail(&dsa_switch_trees, &dst->list);
- kref_init(&dst->refcount);
+ dst = dsa_tree_alloc(index);
return dst;
}
-static void dsa_dst_add_ds(struct dsa_switch_tree *dst,
- struct dsa_switch *ds, u32 index)
+static void dsa_tree_get(struct dsa_switch_tree *dst)
{
kref_get(&dst->refcount);
- dst->ds[index] = ds;
}
-static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
- struct dsa_switch *ds, u32 index)
+static void dsa_tree_release(struct kref *ref)
{
- dst->ds[index] = NULL;
- kref_put(&dst->refcount, dsa_free_dst);
+ struct dsa_switch_tree *dst;
+
+ dst = container_of(ref, struct dsa_switch_tree, refcount);
+
+ dsa_tree_free(dst);
+}
+
+static void dsa_tree_put(struct dsa_switch_tree *dst)
+{
+ kref_put(&dst->refcount, dsa_tree_release);
}
/* For platform data configurations, we need to have a valid name argument to
@@ -87,23 +99,17 @@ static void dsa_dst_del_ds(struct dsa_switch_tree *dst,
*/
static bool dsa_port_is_valid(struct dsa_port *port)
{
- return !!(port->dn || port->name);
+ return port->type != DSA_PORT_TYPE_UNUSED;
}
static bool dsa_port_is_dsa(struct dsa_port *port)
{
- if (port->name && !strcmp(port->name, "dsa"))
- return true;
- else
- return !!of_parse_phandle(port->dn, "link", 0);
+ return port->type == DSA_PORT_TYPE_DSA;
}
static bool dsa_port_is_cpu(struct dsa_port *port)
{
- if (port->name && !strcmp(port->name, "cpu"))
- return true;
- else
- return !!of_parse_phandle(port->dn, "ethernet", 0);
+ return port->type == DSA_PORT_TYPE_CPU;
}
static bool dsa_ds_find_port_dn(struct dsa_switch *ds,
@@ -183,8 +189,6 @@ static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds)
err = dsa_port_complete(dst, ds, port, index);
if (err != 0)
return err;
-
- ds->dsa_port_mask |= BIT(index);
}
return 0;
@@ -219,7 +223,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port)
struct dsa_switch *ds = port->ds;
int err;
- err = dsa_cpu_dsa_setup(port);
+ err = dsa_port_fixed_link_register_of(port);
if (err) {
dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n",
port->index, err);
@@ -235,7 +239,7 @@ static int dsa_dsa_port_apply(struct dsa_port *port)
static void dsa_dsa_port_unapply(struct dsa_port *port)
{
devlink_port_unregister(&port->devlink_port);
- dsa_cpu_dsa_destroy(port);
+ dsa_port_fixed_link_unregister_of(port);
}
static int dsa_cpu_port_apply(struct dsa_port *port)
@@ -243,7 +247,7 @@ static int dsa_cpu_port_apply(struct dsa_port *port)
struct dsa_switch *ds = port->ds;
int err;
- err = dsa_cpu_dsa_setup(port);
+ err = dsa_port_fixed_link_register_of(port);
if (err) {
dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n",
port->index, err);
@@ -259,27 +263,19 @@ static int dsa_cpu_port_apply(struct dsa_port *port)
static void dsa_cpu_port_unapply(struct dsa_port *port)
{
devlink_port_unregister(&port->devlink_port);
- dsa_cpu_dsa_destroy(port);
- port->ds->cpu_port_mask &= ~BIT(port->index);
-
+ dsa_port_fixed_link_unregister_of(port);
}
static int dsa_user_port_apply(struct dsa_port *port)
{
struct dsa_switch *ds = port->ds;
- const char *name = port->name;
int err;
- if (port->dn)
- name = of_get_property(port->dn, "label", NULL);
- if (!name)
- name = "eth%d";
-
- err = dsa_slave_create(port, name);
+ err = dsa_slave_create(port);
if (err) {
dev_warn(ds->dev, "Failed to create slave %d: %d\n",
port->index, err);
- port->netdev = NULL;
+ port->slave = NULL;
return err;
}
@@ -289,7 +285,7 @@ static int dsa_user_port_apply(struct dsa_port *port)
if (err)
return err;
- devlink_port_type_eth_set(&port->devlink_port, port->netdev);
+ devlink_port_type_eth_set(&port->devlink_port, port->slave);
return 0;
}
@@ -297,10 +293,9 @@ static int dsa_user_port_apply(struct dsa_port *port)
static void dsa_user_port_unapply(struct dsa_port *port)
{
devlink_port_unregister(&port->devlink_port);
- if (port->netdev) {
- dsa_slave_destroy(port->netdev);
- port->netdev = NULL;
- port->ds->enabled_port_mask &= ~(1 << port->index);
+ if (port->slave) {
+ dsa_slave_destroy(port->slave);
+ port->slave = NULL;
}
}
@@ -315,7 +310,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
* the slave MDIO bus driver rely on these values for probing PHY
* devices or not
*/
- ds->phys_mii_mask = ds->enabled_port_mask;
+ ds->phys_mii_mask |= dsa_user_ports(ds);
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
@@ -336,12 +331,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
if (err)
return err;
- if (ds->ops->set_addr) {
- err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
- if (err < 0)
- return err;
- }
-
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
@@ -438,9 +427,9 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
* sent to the tag format's receive function.
*/
wmb();
- dst->cpu_dp->netdev->dsa_ptr = dst->cpu_dp;
+ dst->cpu_dp->master->dsa_ptr = dst->cpu_dp;
- err = dsa_master_ethtool_setup(dst->cpu_dp->netdev);
+ err = dsa_master_ethtool_setup(dst->cpu_dp->master);
if (err)
return err;
@@ -457,9 +446,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
if (!dst->applied)
return;
- dsa_master_ethtool_restore(dst->cpu_dp->netdev);
+ dsa_master_ethtool_restore(dst->cpu_dp->master);
- dst->cpu_dp->netdev->dsa_ptr = NULL;
+ dst->cpu_dp->master->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -477,56 +466,78 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dst->cpu_dp = NULL;
- pr_info("DSA: tree %d unapplied\n", dst->tree);
+ pr_info("DSA: tree %d unapplied\n", dst->index);
dst->applied = false;
}
-static int dsa_cpu_parse(struct dsa_port *port, u32 index,
- struct dsa_switch_tree *dst,
- struct dsa_switch *ds)
+static void dsa_tree_remove_switch(struct dsa_switch_tree *dst,
+ unsigned int index)
{
- const struct dsa_device_ops *tag_ops;
- enum dsa_tag_protocol tag_protocol;
- struct net_device *ethernet_dev;
- struct device_node *ethernet;
+ dst->ds[index] = NULL;
+ dsa_tree_put(dst);
+}
- if (port->dn) {
- ethernet = of_parse_phandle(port->dn, "ethernet", 0);
- if (!ethernet)
- return -EINVAL;
- ethernet_dev = of_find_net_device_by_node(ethernet);
- } else {
- ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
- dev_put(ethernet_dev);
- }
+static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
+ struct dsa_switch *ds)
+{
+ unsigned int index = ds->index;
- if (!ethernet_dev)
- return -EPROBE_DEFER;
+ if (dst->ds[index])
+ return -EBUSY;
- if (!dst->cpu_dp) {
- dst->cpu_dp = port;
- dst->cpu_dp->netdev = ethernet_dev;
- }
+ dsa_tree_get(dst);
+ dst->ds[index] = ds;
- /* Initialize cpu_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- ds->cpu_port_mask |= BIT(index);
+ return 0;
+}
+
+static int dsa_port_parse_user(struct dsa_port *dp, const char *name)
+{
+ if (!name)
+ name = "eth%d";
+
+ dp->type = DSA_PORT_TYPE_USER;
+ dp->name = name;
+
+ return 0;
+}
+
+static int dsa_port_parse_dsa(struct dsa_port *dp)
+{
+ dp->type = DSA_PORT_TYPE_DSA;
+
+ return 0;
+}
+
+static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_switch_tree *dst = ds->dst;
+ const struct dsa_device_ops *tag_ops;
+ enum dsa_tag_protocol tag_protocol;
tag_protocol = ds->ops->get_tag_protocol(ds);
tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
- ds->cpu_port_mask &= ~BIT(index);
return PTR_ERR(tag_ops);
}
- dst->cpu_dp->tag_ops = tag_ops;
+ dp->type = DSA_PORT_TYPE_CPU;
+ dp->rcv = tag_ops->rcv;
+ dp->tag_ops = tag_ops;
+ dp->master = master;
+ dp->dst = dst;
+
+ return 0;
+}
- /* Make a few copies for faster access in master receive hot path */
- dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
- dst->cpu_dp->dst = dst;
+static int dsa_cpu_parse(struct dsa_port *port, u32 index,
+ struct dsa_switch_tree *dst,
+ struct dsa_switch *ds)
+{
+ if (!dst->cpu_dp)
+ dst->cpu_dp = port;
return 0;
}
@@ -547,17 +558,11 @@ static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds)
err = dsa_cpu_parse(port, index, dst, ds);
if (err)
return err;
- } else {
- /* Initialize enabled_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- ds->enabled_port_mask |= BIT(index);
}
}
- pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index);
+ pr_info("DSA: switch %d %d parsed\n", dst->index, ds->index);
return 0;
}
@@ -602,16 +607,48 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst)
}
}
- pr_info("DSA: tree %d parsed\n", dst->tree);
+ pr_info("DSA: tree %d parsed\n", dst->index);
return 0;
}
-static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
+static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
{
- struct device_node *port;
- int err;
+ struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0);
+ const char *name = of_get_property(dn, "label", NULL);
+ bool link = of_property_read_bool(dn, "link");
+
+ dp->dn = dn;
+
+ if (ethernet) {
+ struct net_device *master;
+
+ master = of_find_net_device_by_node(ethernet);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ return dsa_port_parse_cpu(dp, master);
+ }
+
+ if (link)
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
+}
+
+static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
+ struct device_node *dn)
+{
+ struct device_node *ports, *port;
+ struct dsa_port *dp;
u32 reg;
+ int err;
+
+ ports = of_get_child_by_name(dn, "ports");
+ if (!ports) {
+ dev_err(ds->dev, "no ports child node found\n");
+ return -EINVAL;
+ }
for_each_available_child_of_node(ports, port) {
err = of_property_read_u32(port, "reg", &reg);
@@ -621,79 +658,114 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds)
if (reg >= ds->num_ports)
return -EINVAL;
- ds->ports[reg].dn = port;
+ dp = &ds->ports[reg];
+
+ err = dsa_port_parse_of(dp, port);
+ if (err)
+ return err;
}
return 0;
}
-static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds)
+static int dsa_switch_parse_member_of(struct dsa_switch *ds,
+ struct device_node *dn)
{
- bool valid_name_found = false;
- unsigned int i;
+ u32 m[2] = { 0, 0 };
+ int sz;
- for (i = 0; i < DSA_MAX_PORTS; i++) {
- if (!cd->port_names[i])
- continue;
+ /* Don't error out if this optional property isn't found */
+ sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2);
+ if (sz < 0 && sz != -EINVAL)
+ return sz;
- ds->ports[i].name = cd->port_names[i];
- valid_name_found = true;
- }
-
- if (!valid_name_found && i == DSA_MAX_PORTS)
+ ds->index = m[1];
+ if (ds->index >= DSA_MAX_SWITCHES)
return -EINVAL;
+ ds->dst = dsa_tree_touch(m[0]);
+ if (!ds->dst)
+ return -ENOMEM;
+
return 0;
}
-static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index)
+static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn)
{
int err;
- *tree = *index = 0;
-
- err = of_property_read_u32_index(np, "dsa,member", 0, tree);
- if (err) {
- /* Does not exist, but it is optional */
- if (err == -EINVAL)
- return 0;
- return err;
- }
-
- err = of_property_read_u32_index(np, "dsa,member", 1, index);
+ err = dsa_switch_parse_member_of(ds, dn);
if (err)
return err;
- if (*index >= DSA_MAX_SWITCHES)
- return -EINVAL;
+ return dsa_switch_parse_ports_of(ds, dn);
+}
- return 0;
+static int dsa_port_parse(struct dsa_port *dp, const char *name,
+ struct device *dev)
+{
+ if (!strcmp(name, "cpu")) {
+ struct net_device *master;
+
+ master = dsa_dev_to_net_device(dev);
+ if (!master)
+ return -EPROBE_DEFER;
+
+ dev_put(master);
+
+ return dsa_port_parse_cpu(dp, master);
+ }
+
+ if (!strcmp(name, "dsa"))
+ return dsa_port_parse_dsa(dp);
+
+ return dsa_port_parse_user(dp, name);
}
-static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index)
+static int dsa_switch_parse_ports(struct dsa_switch *ds,
+ struct dsa_chip_data *cd)
{
- if (!pd)
- return -ENODEV;
+ bool valid_name_found = false;
+ struct dsa_port *dp;
+ struct device *dev;
+ const char *name;
+ unsigned int i;
+ int err;
- /* We do not support complex trees with dsa_chip_data */
- *tree = 0;
- *index = 0;
+ for (i = 0; i < DSA_MAX_PORTS; i++) {
+ name = cd->port_names[i];
+ dev = cd->netdev[i];
+ dp = &ds->ports[i];
+
+ if (!name)
+ continue;
+
+ err = dsa_port_parse(dp, name, dev);
+ if (err)
+ return err;
+
+ valid_name_found = true;
+ }
+
+ if (!valid_name_found && i == DSA_MAX_PORTS)
+ return -EINVAL;
return 0;
}
-static struct device_node *dsa_get_ports(struct dsa_switch *ds,
- struct device_node *np)
+static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd)
{
- struct device_node *ports;
+ ds->cd = cd;
- ports = of_get_child_by_name(np, "ports");
- if (!ports) {
- dev_err(ds->dev, "no ports child node found\n");
- return ERR_PTR(-EINVAL);
- }
+ /* We don't support interconnected switches nor multiple trees via
+ * platform data, so this is the unique switch of the tree.
+ */
+ ds->index = 0;
+ ds->dst = dsa_tree_touch(0);
+ if (!ds->dst)
+ return -ENOMEM;
- return ports;
+ return dsa_switch_parse_ports(ds, cd);
}
static int _dsa_register_switch(struct dsa_switch *ds)
@@ -701,63 +773,37 @@ static int _dsa_register_switch(struct dsa_switch *ds)
struct dsa_chip_data *pdata = ds->dev->platform_data;
struct device_node *np = ds->dev->of_node;
struct dsa_switch_tree *dst;
- struct device_node *ports;
- u32 tree, index;
+ unsigned int index;
int i, err;
- if (np) {
- err = dsa_parse_member_dn(np, &tree, &index);
- if (err)
- return err;
-
- ports = dsa_get_ports(ds, np);
- if (IS_ERR(ports))
- return PTR_ERR(ports);
-
- err = dsa_parse_ports_dn(ports, ds);
- if (err)
- return err;
- } else {
- err = dsa_parse_member(pdata, &tree, &index);
- if (err)
- return err;
-
- err = dsa_parse_ports(pdata, ds);
- if (err)
- return err;
- }
-
- dst = dsa_get_dst(tree);
- if (!dst) {
- dst = dsa_add_dst(tree);
- if (!dst)
- return -ENOMEM;
- }
+ if (np)
+ err = dsa_switch_parse_of(ds, np);
+ else if (pdata)
+ err = dsa_switch_parse(ds, pdata);
+ else
+ err = -ENODEV;
- if (dst->ds[index]) {
- err = -EBUSY;
- goto out;
- }
+ if (err)
+ return err;
- ds->dst = dst;
- ds->index = index;
- ds->cd = pdata;
+ index = ds->index;
+ dst = ds->dst;
/* Initialize the routing table */
for (i = 0; i < DSA_MAX_SWITCHES; ++i)
ds->rtable[i] = DSA_RTABLE_NONE;
- dsa_dst_add_ds(dst, ds, index);
+ err = dsa_tree_add_switch(dst, ds);
+ if (err)
+ return err;
err = dsa_dst_complete(dst);
if (err < 0)
goto out_del_dst;
- if (err == 1) {
- /* Not all switches registered yet */
- err = 0;
- goto out;
- }
+ /* Not all switches registered yet */
+ if (err == 1)
+ return 0;
if (dst->applied) {
pr_info("DSA: Disjoint trees?\n");
@@ -765,14 +811,8 @@ static int _dsa_register_switch(struct dsa_switch *ds)
}
err = dsa_dst_parse(dst);
- if (err) {
- if (err == -EPROBE_DEFER) {
- dsa_dst_del_ds(dst, ds, ds->index);
- return err;
- }
-
+ if (err)
goto out_del_dst;
- }
err = dsa_dst_apply(dst);
if (err) {
@@ -780,13 +820,10 @@ static int _dsa_register_switch(struct dsa_switch *ds)
goto out_del_dst;
}
- dsa_put_dst(dst);
return 0;
out_del_dst:
- dsa_dst_del_ds(dst, ds, ds->index);
-out:
- dsa_put_dst(dst);
+ dsa_tree_remove_switch(dst, index);
return err;
}
@@ -828,10 +865,11 @@ EXPORT_SYMBOL_GPL(dsa_register_switch);
static void _dsa_unregister_switch(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
+ unsigned int index = ds->index;
dsa_dst_unapply(dst);
- dsa_dst_del_ds(dst, ds, ds->index);
+ dsa_tree_remove_switch(dst, index);
}
void dsa_unregister_switch(struct dsa_switch *ds)
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2850077c..253a613 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -93,8 +93,6 @@ struct dsa_slave_priv {
};
/* dsa.c */
-int dsa_cpu_dsa_setup(struct dsa_port *port);
-void dsa_cpu_dsa_destroy(struct dsa_port *dport);
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
bool dsa_schedule_work(struct work_struct *work);
@@ -113,8 +111,8 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
int dsa_master_ethtool_setup(struct net_device *dev);
void dsa_master_ethtool_restore(struct net_device *dev);
-static inline struct net_device *dsa_master_get_slave(struct net_device *dev,
- int device, int port)
+static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+ int device, int port)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
struct dsa_switch_tree *dst = cpu_dp->dst;
@@ -130,7 +128,7 @@ static inline struct net_device *dsa_master_get_slave(struct net_device *dev,
if (port < 0 || port >= ds->num_ports)
return NULL;
- return ds->ports[port].netdev;
+ return ds->ports[port].slave;
}
/* port.c */
@@ -159,16 +157,34 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
+int dsa_port_fixed_link_register_of(struct dsa_port *dp);
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-int dsa_slave_create(struct dsa_port *port, const char *name);
+int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
+static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ return p->dp;
+}
+
+static inline struct net_device *
+dsa_slave_to_master(const struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dp->cpu_dp->master;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
@@ -197,9 +213,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
/* tag_trailer.c */
extern const struct dsa_device_ops trailer_netdev_ops;
-static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
-{
- return p->dp->cpu_dp->netdev;
-}
-
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 19ff6e0..0511fe2 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- ret = dsa_cpu_dsa_setup(&ds->ports[port]);
+ ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
@@ -101,6 +101,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
struct dsa_chip_data *cd = ds->cd;
bool valid_name_found = false;
int index = ds->index;
+ struct dsa_port *dp;
int i, ret;
/*
@@ -109,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
for (i = 0; i < ds->num_ports; i++) {
char *name;
+ dp = &ds->ports[i];
+
name = cd->port_names[i];
if (name == NULL)
continue;
+ dp->name = name;
if (!strcmp(name, "cpu")) {
if (dst->cpu_dp) {
@@ -120,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
return -EINVAL;
}
dst->cpu_dp = &ds->ports[i];
- dst->cpu_dp->netdev = master;
- ds->cpu_port_mask |= 1 << i;
+ dst->cpu_dp->master = master;
+ dp->type = DSA_PORT_TYPE_CPU;
} else if (!strcmp(name, "dsa")) {
- ds->dsa_port_mask |= 1 << i;
+ dp->type = DSA_PORT_TYPE_DSA;
} else {
- ds->enabled_port_mask |= 1 << i;
+ dp->type = DSA_PORT_TYPE_USER;
}
valid_name_found = true;
}
@@ -136,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
/* Make the built-in MII bus mask match the number of ports,
* switch drivers can override this later
*/
- ds->phys_mii_mask = ds->enabled_port_mask;
+ ds->phys_mii_mask |= dsa_user_ports(ds);
/*
* If the CPU connects to this switch, set the switch tree
@@ -172,12 +176,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
if (ret)
return ret;
- if (ops->set_addr) {
- ret = ops->set_addr(ds, master->dev_addr);
- if (ret < 0)
- return ret;
- }
-
if (!ds->slave_mii_bus && ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
@@ -196,10 +194,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
ds->ports[i].dn = cd->port_dn[i];
ds->ports[i].cpu_dp = dst->cpu_dp;
- if (!(ds->enabled_port_mask & (1 << i)))
+ if (dsa_is_user_port(ds, i))
continue;
- ret = dsa_slave_create(&ds->ports[i], cd->port_names[i]);
+ ret = dsa_slave_create(&ds->ports[i]);
if (ret < 0)
netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
index, i, cd->port_names[i], ret);
@@ -264,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
/* Destroy network devices for physical switch ports. */
for (port = 0; port < ds->num_ports; port++) {
- if (!(ds->enabled_port_mask & (1 << port)))
+ if (!dsa_is_user_port(ds, port))
continue;
- if (!ds->ports[port].netdev)
+ if (!ds->ports[port].slave)
continue;
- dsa_slave_destroy(ds->ports[port].netdev);
+ dsa_slave_destroy(ds->ports[port].slave);
}
/* Disable configuration of the CPU and DSA ports */
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_cpu_dsa_destroy(&ds->ports[port]);
-
- /* Clearing a bit which is not set does no harm */
- ds->cpu_port_mask |= ~(1 << port);
- ds->dsa_port_mask |= ~(1 << port);
+ dsa_port_fixed_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
@@ -607,7 +601,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
wmb();
dev->dsa_ptr = dst->cpu_dp;
- return dsa_master_ethtool_setup(dst->cpu_dp->netdev);
+ return dsa_master_ethtool_setup(dst->cpu_dp->master);
}
static int dsa_probe(struct platform_device *pdev)
@@ -672,9 +666,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dsa_master_ethtool_restore(dst->cpu_dp->netdev);
+ dsa_master_ethtool_restore(dst->cpu_dp->master);
- dst->cpu_dp->netdev->dsa_ptr = NULL;
+ dst->cpu_dp->master->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -689,7 +683,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dev_put(dst->cpu_dp->netdev);
+ dev_put(dst->cpu_dp->master);
}
static int dsa_remove(struct platform_device *pdev)
@@ -746,8 +740,7 @@ int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
const unsigned char *addr, u16 vid,
u16 flags)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
return dsa_port_fdb_add(dp, addr, vid);
}
@@ -756,8 +749,7 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
return dsa_port_fdb_del(dp, addr, vid);
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 72c8dbd3..bb30b1a 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -12,6 +12,8 @@
#include <linux/if_bridge.h>
#include <linux/notifier.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include "dsa_priv.h"
@@ -264,3 +266,48 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
}
+
+int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+{
+ struct device_node *dn = dp->dn;
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int mode;
+ int err;
+
+ if (of_phy_is_fixed_link(dn)) {
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
+
+ phydev = of_phy_find_device(dn);
+
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
+
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ put_device(&phydev->mdio.dev);
+ }
+
+ return 0;
+}
+
+void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+{
+ struct device_node *dn = dp->dn;
+
+ if (of_phy_is_fixed_link(dn))
+ of_phy_deregister_fixed_link(dn);
+}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 4b634db..814ced7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
ds->slave_mii_bus->read = dsa_slave_phy_read;
ds->slave_mii_bus->write = dsa_slave_phy_write;
snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d",
- ds->dst->tree, ds->index);
+ ds->dst->index, ds->index);
ds->slave_mii_bus->parent = ds->dev;
ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask;
}
@@ -64,16 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
/* slave device handling ****************************************************/
static int dsa_slave_get_iflink(const struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return dsa_master_netdev(p)->ifindex;
+ return dsa_slave_to_master(dev)->ifindex;
}
static int dsa_slave_open(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
if (!(master->flags & IFF_UP))
@@ -120,9 +117,8 @@ out:
static int dsa_slave_close(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
- struct dsa_port *dp = p->dp;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
if (dev->phydev)
phy_stop(dev->phydev);
@@ -144,8 +140,7 @@ static int dsa_slave_close(struct net_device *dev)
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
if (change & IFF_ALLMULTI)
dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -155,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
dev_mc_sync(master, dev);
dev_uc_sync(master, dev);
@@ -164,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
struct sockaddr *addr = a;
int err;
@@ -246,14 +239,13 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *filter_dev,
int *idx)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_dump_ctx dump = {
.dev = dev,
.skb = skb,
.cb = cb,
.idx = *idx,
};
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
int err;
err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
@@ -274,8 +266,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
switch (attr->id) {
@@ -301,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -329,8 +319,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
switch (obj->id) {
@@ -351,8 +340,8 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
@@ -413,7 +402,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
*/
- nskb->dev = dsa_master_netdev(p);
+ nskb->dev = dsa_slave_to_master(dev);
dev_queue_xmit(nskb);
return NETDEV_TX_OK;
@@ -431,11 +420,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
static int dsa_slave_get_regs_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs_len)
- return ds->ops->get_regs_len(ds, p->dp->index);
+ return ds->ops->get_regs_len(ds, dp->index);
return -EOPNOTSUPP;
}
@@ -443,11 +432,11 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
static void
dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs)
- ds->ops->get_regs(ds, p->dp->index, regs, _p);
+ ds->ops->get_regs(ds, dp->index, regs, _p);
}
static u32 dsa_slave_get_link(struct net_device *dev)
@@ -462,8 +451,8 @@ static u32 dsa_slave_get_link(struct net_device *dev)
static int dsa_slave_get_eeprom_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
@@ -477,8 +466,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
static int dsa_slave_get_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eeprom)
return ds->ops->get_eeprom(ds, eeprom, data);
@@ -489,8 +478,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
static int dsa_slave_set_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->set_eeprom)
return ds->ops->set_eeprom(ds, eeprom, data);
@@ -501,8 +490,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
static void dsa_slave_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
@@ -512,7 +501,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
strncpy(data + 2 * len, "rx_packets", len);
strncpy(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, p->dp->index, data + 4 * len);
+ ds->ops->get_strings(ds, dp->index, data + 4 * len);
}
}
@@ -520,8 +509,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
struct pcpu_sw_netstats *s;
unsigned int start;
int i;
@@ -543,13 +533,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
data[3] += rx_bytes;
}
if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
+ ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
}
static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (sset == ETH_SS_STATS) {
int count;
@@ -566,29 +556,29 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_wol)
- ds->ops->get_wol(ds, p->dp->index, w);
+ ds->ops->get_wol(ds, dp->index, w);
}
static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret = -EOPNOTSUPP;
if (ds->ops->set_wol)
- ret = ds->ops->set_wol(ds, p->dp->index, w);
+ ret = ds->ops->set_wol(ds, dp->index, w);
return ret;
}
static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
/* Port's PHY and MAC both need to be EEE capable */
@@ -598,7 +588,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
if (!ds->ops->set_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->set_mac_eee(ds, p->dp->index, e);
+ ret = ds->ops->set_mac_eee(ds, dp->index, e);
if (ret)
return ret;
@@ -613,8 +603,8 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
/* Port's PHY and MAC both need to be EEE capable */
@@ -624,7 +614,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
if (!ds->ops->get_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->get_mac_eee(ds, p->dp->index, e);
+ ret = ds->ops->get_mac_eee(ds, dp->index, e);
if (ret)
return ret;
@@ -635,8 +625,8 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
struct netpoll *netpoll;
int err = 0;
@@ -676,9 +666,9 @@ static void dsa_slave_poll_controller(struct net_device *dev)
static int dsa_slave_get_phys_port_name(struct net_device *dev,
char *name, size_t len)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
- if (snprintf(name, len, "p%d", p->dp->index) >= len)
+ if (snprintf(name, len, "p%d", dp->index) >= len)
return -EINVAL;
return 0;
@@ -701,14 +691,15 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
__be16 protocol = cls->common.protocol;
- struct dsa_switch *ds = p->dp->ds;
struct net *net = dev_net(dev);
- struct dsa_slave_priv *to_p;
+ struct dsa_switch *ds = dp->ds;
struct net_device *to_dev;
const struct tc_action *a;
+ struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
LIST_HEAD(actions);
int ifindex;
@@ -741,13 +732,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
mirror = &mall_tc_entry->mirror;
- to_p = netdev_priv(to_dev);
+ to_dp = dsa_slave_to_port(to_dev);
- mirror->to_local_port = to_p->dp->index;
+ mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
- err = ds->ops->port_mirror_add(ds, p->dp->index, mirror,
- ingress);
+ err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
if (err) {
kfree(mall_tc_entry);
return err;
@@ -762,9 +752,9 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
static void dsa_slave_del_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->port_mirror_del)
return;
@@ -777,8 +767,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
switch (mall_tc_entry->type) {
case DSA_PORT_MALL_MIRROR:
- ds->ops->port_mirror_del(ds, p->dp->index,
- &mall_tc_entry->mirror);
+ ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
break;
default:
WARN_ON(1);
@@ -788,17 +777,9 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
}
static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
- struct tc_cls_matchall_offload *cls)
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
- bool ingress;
-
- if (is_classid_clsact_ingress(cls->common.classid))
- ingress = true;
- else if (is_classid_clsact_egress(cls->common.classid))
- ingress = false;
- else
- return -EOPNOTSUPP;
-
if (cls->common.chain_index)
return -EOPNOTSUPP;
@@ -813,12 +794,63 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
}
}
+static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv, bool ingress)
+{
+ struct net_device *dev = cb_priv;
+
+ if (!tc_can_offload(dev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int dsa_slave_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ cb = dsa_slave_setup_tc_block_cb_ig;
+ else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ cb = dsa_slave_setup_tc_block_cb_eg;
+ else
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cb, dev, dev);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cb, dev);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSMATCHALL:
- return dsa_slave_setup_tc_cls_matchall(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return dsa_slave_setup_tc_block(dev, type_data);
default:
return -EOPNOTSUPP;
}
@@ -855,25 +887,25 @@ static void dsa_slave_get_stats64(struct net_device *dev,
static int dsa_slave_get_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->get_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs);
+ return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
}
static int dsa_slave_set_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->set_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->set_rxnfc(ds, p->dp->index, nfc);
+ return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
@@ -933,8 +965,9 @@ static struct device_type dsa_type = {
static void dsa_slave_adjust_link(struct net_device *dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
unsigned int status_changed = 0;
if (p->old_link != dev->phydev->link) {
@@ -953,7 +986,7 @@ static void dsa_slave_adjust_link(struct net_device *dev)
}
if (ds->ops->adjust_link && status_changed)
- ds->ops->adjust_link(ds, p->dp->index, dev->phydev);
+ ds->ops->adjust_link(ds, dp->index, dev->phydev);
if (status_changed)
phy_print_status(dev->phydev);
@@ -962,14 +995,14 @@ static void dsa_slave_adjust_link(struct net_device *dev)
static int dsa_slave_fixed_link_update(struct net_device *dev,
struct fixed_phy_status *status)
{
- struct dsa_slave_priv *p;
struct dsa_switch *ds;
+ struct dsa_port *dp;
if (dev) {
- p = netdev_priv(dev);
- ds = p->dp->ds;
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
if (ds->ops->fixed_link_update)
- ds->ops->fixed_link_update(ds, p->dp->index, status);
+ ds->ops->fixed_link_update(ds, dp->index, status);
}
return 0;
@@ -978,8 +1011,9 @@ static int dsa_slave_fixed_link_update(struct net_device *dev,
/* slave device setup *******************************************************/
static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
{
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
if (!slave_dev->phydev) {
@@ -997,14 +1031,15 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
static int dsa_slave_phy_setup(struct net_device *slave_dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- struct dsa_switch *ds = p->dp->ds;
- struct device_node *phy_dn, *port_dn;
+ struct device_node *port_dn = dp->dn;
+ struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
int mode, ret;
- port_dn = p->dp->dn;
mode = of_get_phy_mode(port_dn);
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
@@ -1025,31 +1060,13 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
}
if (ds->ops->get_phy_flags)
- phy_flags = ds->ops->get_phy_flags(ds, p->dp->index);
+ phy_flags = ds->ops->get_phy_flags(ds, dp->index);
if (phy_dn) {
- int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
-
- /* If this PHY address is part of phys_mii_mask, which means
- * that we need to divert reads and writes to/from it, then we
- * want to bind this device using the slave MII bus created by
- * DSA to make that happen.
- */
- if (!phy_is_fixed && phy_id >= 0 &&
- (ds->phys_mii_mask & (1 << phy_id))) {
- ret = dsa_slave_phy_connect(slave_dev, phy_id);
- if (ret) {
- netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
- of_node_put(phy_dn);
- return ret;
- }
- } else {
- slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
- dsa_slave_adjust_link,
- phy_flags,
- p->phy_interface);
- }
-
+ slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
+ dsa_slave_adjust_link,
+ phy_flags,
+ p->phy_interface);
of_node_put(phy_dn);
}
@@ -1061,10 +1078,10 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* MDIO bus instead
*/
if (!slave_dev->phydev) {
- ret = dsa_slave_phy_connect(slave_dev, p->dp->index);
+ ret = dsa_slave_phy_connect(slave_dev, dp->index);
if (ret) {
netdev_err(slave_dev, "failed to connect to port %d: %d\n",
- p->dp->index, ret);
+ dp->index, ret);
if (phy_is_fixed)
of_phy_deregister_fixed_link(port_dn);
return ret;
@@ -1114,18 +1131,30 @@ int dsa_slave_resume(struct net_device *slave_dev)
return 0;
}
-int dsa_slave_create(struct dsa_port *port, const char *name)
+static void dsa_slave_notify(struct net_device *dev, unsigned long val)
{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_notifier_register_info rinfo = {
+ .switch_number = dp->ds->index,
+ .port_number = dp->index,
+ .master = master,
+ .info.dev = dev,
+ };
+
+ call_dsa_notifiers(val, dev, &rinfo.info);
+}
+
+int dsa_slave_create(struct dsa_port *port)
+{
+ struct dsa_port *cpu_dp = port->cpu_dp;
+ struct net_device *master = cpu_dp->master;
struct dsa_switch *ds = port->ds;
- struct net_device *master;
+ const char *name = port->name;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
- struct dsa_port *cpu_dp;
int ret;
- cpu_dp = ds->dst->cpu_dp;
- master = cpu_dp->netdev;
-
if (!ds->num_tx_queues)
ds->num_tx_queues = 1;
@@ -1167,37 +1196,43 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
p->old_link = -1;
p->old_duplex = -1;
- port->netdev = slave_dev;
- ret = register_netdev(slave_dev);
- if (ret) {
- netdev_err(master, "error %d registering interface %s\n",
- ret, slave_dev->name);
- port->netdev = NULL;
- free_percpu(p->stats64);
- free_netdev(slave_dev);
- return ret;
- }
+ port->slave = slave_dev;
netif_carrier_off(slave_dev);
ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
- unregister_netdev(slave_dev);
- free_percpu(p->stats64);
- free_netdev(slave_dev);
- return ret;
+ goto out_free;
+ }
+
+ dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
+
+ ret = register_netdev(slave_dev);
+ if (ret) {
+ netdev_err(master, "error %d registering interface %s\n",
+ ret, slave_dev->name);
+ goto out_phy;
}
return 0;
+
+out_phy:
+ phy_disconnect(slave_dev->phydev);
+ if (of_phy_is_fixed_link(port->dn))
+ of_phy_deregister_fixed_link(port->dn);
+out_free:
+ free_percpu(p->stats64);
+ free_netdev(slave_dev);
+ port->slave = NULL;
+ return ret;
}
void dsa_slave_destroy(struct net_device *slave_dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- struct device_node *port_dn;
-
- port_dn = p->dp->dn;
+ struct device_node *port_dn = dp->dn;
netif_carrier_off(slave_dev);
if (slave_dev->phydev) {
@@ -1206,6 +1241,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
if (of_phy_is_fixed_link(port_dn))
of_phy_deregister_fixed_link(port_dn);
}
+ dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
unregister_netdev(slave_dev);
free_percpu(p->stats64);
free_netdev(slave_dev);
@@ -1219,8 +1255,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev)) {
@@ -1263,14 +1298,14 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
container_of(work, struct dsa_switchdev_event_work, work);
struct net_device *dev = switchdev_work->dev;
struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
- err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb add failed err=%d\n", err);
break;
@@ -1281,7 +1316,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
- err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb del failed err=%d\n", err);
dev_close(dev);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8e4bdb9..9e082ba 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -61,7 +61,7 @@
static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
@@ -82,9 +82,14 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
brcm_tag[1] = 0;
brcm_tag[2] = 0;
- if (p->dp->index == 8)
+ if (dp->index == 8)
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
- brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
+ brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
+
+ /* Now tell the master network device about the desired output queue
+ * as well
+ */
+ skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
return skb;
}
@@ -114,7 +119,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
- skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index c77218f..dbbcdaf 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *dsa_header;
/*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct tagged FROM_CPU DSA tag from 802.1q tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x60 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x60 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
/*
* Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct untagged FROM_CPU DSA tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x40 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x40 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
dsa_header[2] = 0x00;
dsa_header[3] = 0x00;
}
@@ -91,7 +91,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = dsa_header[0] & 0x1f;
source_port = (dsa_header[1] >> 3) & 0x1f;
- skb->dev = dsa_master_get_slave(dev, source_device, source_port);
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 0b83cbe..f38a626 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *edsa_header;
/*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x60 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x60 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
/*
* Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x40 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x40 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
edsa_header[6] = 0x00;
edsa_header[7] = 0x00;
}
@@ -104,7 +104,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = edsa_header[0] & 0x1f;
source_port = (edsa_header[1] >> 3) & 0x1f;
- skb->dev = dsa_master_get_slave(dev, source_device, source_port);
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index b241c99..0f62eff 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *tag;
@@ -72,7 +72,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
tag[0] = 0;
- tag[1] = 1 << p->dp->index; /* destination port */
+ tag[1] = 1 << dp->index; /* destination port */
return nskb;
}
@@ -87,7 +87,7 @@ static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = tag[0] & 7;
- skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 4f211e5..5ba01fc 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -11,6 +11,7 @@
* GNU General Public License for more details.
*
*/
+#include <linux/dsa/lan9303.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -39,10 +40,26 @@
*/
#define LAN9303_TAG_LEN 4
+# define LAN9303_TAG_TX_USE_ALR BIT(3)
+# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4)
+
+/* Decide whether to transmit using ALR lookup, or transmit directly to
+ * port using tag. ALR learning is performed only when using ALR lookup.
+ * If the two external ports are bridged and the frame is unicast,
+ * then use ALR lookup to allow ALR learning on CPU port.
+ * Otherwise transmit directly to port with STP state override.
+ * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1
+ */
+static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr)
+{
+ struct lan9303 *chip = dp->ds->priv;
+
+ return chip->is_bridged && !is_multicast_ether_addr(dest_addr);
+}
static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *lan9303_tag;
/* insert a special VLAN tag between the MAC addresses
@@ -62,13 +79,16 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
lan9303_tag[0] = htons(ETH_P_8021Q);
- lan9303_tag[1] = htons(p->dp->index | BIT(4));
+ lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ?
+ LAN9303_TAG_TX_USE_ALR :
+ dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
+ lan9303_tag[1] = htons(lan9303_tag[1]);
return skb;
}
static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+ struct packet_type *pt)
{
u16 *lan9303_tag;
unsigned int source_port;
@@ -94,7 +114,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = ntohs(lan9303_tag[1]) & 0x3;
- skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
if (!skb->dev) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
return NULL;
@@ -106,6 +126,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
skb_pull_rcsum(skb, 2 + 2);
memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
2 * ETH_ALEN);
+ skb->offload_fwd_mark = !ether_addr_equal(skb->data - ETH_HLEN,
+ eth_stp_addr);
return skb;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 968586c..8475434 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
/* Build the tag after the MAC Source Address */
mtk_tag = skb->data + 2 * ETH_ALEN;
mtk_tag[0] = 0;
- mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
mtk_tag[2] = 0;
mtk_tag[3] = 0;
@@ -69,7 +69,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
- skb->dev = dsa_master_get_slave(dev, 0, port);
+ skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 8d33d9e..613f4ee 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
- QCA_HDR_XMIT_FROM_CPU |
- BIT(p->dp->index);
+ QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
*phdr = htons(hdr);
@@ -92,7 +91,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
- skb->dev = dsa_master_get_slave(dev, 0, port);
+ skb->dev = dsa_master_find_slave(dev, 0, port);
if (!skb->dev)
return NULL;
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 61668be..7d20e1f 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *trailer;
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer = skb_put(nskb, 4);
trailer[0] = 0x80;
- trailer[1] = 1 << p->dp->index;
+ trailer[1] = 1 << dp->index;
trailer[2] = 0x10;
trailer[3] = 0x00;
@@ -71,7 +71,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = trailer[1] & 7;
- skb->dev = dsa_master_get_slave(dev, 0, source_port);
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
if (!skb->dev)
return NULL;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 172d830..b8cd43c 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -328,12 +328,12 @@ out:
/* Announce (supervision frame) timer function
*/
-static void hsr_announce(unsigned long data)
+static void hsr_announce(struct timer_list *t)
{
struct hsr_priv *hsr;
struct hsr_port *master;
- hsr = (struct hsr_priv *) data;
+ hsr = from_timer(hsr, t, announce_timer);
rcu_read_lock();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
@@ -463,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
hsr->sequence_nr = HSR_SEQNR_START;
hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
- setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr);
-
- setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr);
+ timer_setup(&hsr->announce_timer, hsr_announce, 0);
+ timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr);
hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec;
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 284a9b8..286ceb4 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr,
/* Remove stale sequence_nr records. Called by timer every
* HSR_LIFE_CHECK_INTERVAL (two seconds or so).
*/
-void hsr_prune_nodes(unsigned long data)
+void hsr_prune_nodes(struct timer_list *t)
{
- struct hsr_priv *hsr;
+ struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
struct hsr_node *node;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
- hsr = (struct hsr_priv *) data;
-
rcu_read_lock();
list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
/* Shorthand */
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 4e04f0e..370b459 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr);
-void hsr_prune_nodes(unsigned long data);
+void hsr_prune_nodes(struct timer_list *t);
int hsr_create_self_node(struct list_head *self_node_db,
unsigned char addr_a[ETH_ALEN],
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ac7c96b..d8de3bc 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_6LOWPAN_I_H__
#define __IEEE802154_6LOWPAN_I_H__
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08b..85bf86a 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
fq->daddr = *arg->dst;
}
-static void lowpan_frag_expire(unsigned long data)
+static void lowpan_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
spin_lock(&fq->q.lock);
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 9b92ade..f05b7bd 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IEEE802154) += ieee802154.o
obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
obj-y += 6lowpan/
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index 81141f5..1c19f57 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_CORE_H
#define __IEEE802154_CORE_H
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5..96636e3 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-static const struct genl_ops ieee8021154_ops[] = {
+static const struct genl_ops ieee802154_ops[] = {
/* see nl-phy.c */
IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
.version = 1,
.maxattr = IEEE802154_ATTR_MAX,
.module = THIS_MODULE,
- .ops = ieee8021154_ops,
- .n_ops = ARRAY_SIZE(ieee8021154_ops),
+ .ops = ieee802154_ops,
+ .n_ops = ARRAY_SIZE(ieee802154_ops),
.mcgrps = ieee802154_mcgrps,
.n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
};
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 3846a89..8c4b6d0 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_NL802154_H
#define __IEEE802154_NL802154_H
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 4441c63..598f5af 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG802154_RDEV_OPS
#define __CFG802154_RDEV_OPS
diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h
index aa42e39..337545b 100644
--- a/net/ieee802154/sysfs.h
+++ b/net/ieee802154/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __IEEE802154_SYSFS_H
#define __IEEE802154_SYSFS_H
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 9a471e4..19c2e5d 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Based on net/wireless/trace.h */
#undef TRACE_SYSTEM
diff --git a/net/ife/ife.c b/net/ife/ife.c
index f360341..7d1ec76 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -137,6 +137,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval)
EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>");
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("Inter-FE LFB action");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 91a2557..f48fe6f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES
address into account. Furthermore, the TOS (Type-Of-Service) field
of the packet can be used for routing decisions as well.
- If you are interested in this, please see the preliminary
- documentation at <http://www.compendium.com.ar/policy-routing.txt>
- and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>.
- You will need supporting software from
- <ftp://ftp.tux.org/pub/net/ip-routing/>.
+ If you need more information, see the Linux Advanced
+ Routing and Traffic Control documentation at
+ <http://lartc.org/howto/lartc.rpdb.html>
If unsure, say N.
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index afcb435..c6c8ad1 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TCP/IP (INET) layer.
#
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 43a1bbe..ce4aa82 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -827,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
POLLHUP, even on eg. unconnected UDP sockets -- RR */
+ /* fall through */
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
@@ -840,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
- /* Fall through */
+ /* fall through */
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7c45b88..a8d7c5a 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSARP:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
+ /* fall through */
case SIOCGARP:
err = copy_from_user(&r, arg, sizeof(struct arpreq));
if (err)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ae8f54..82178cc 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1973,11 +1973,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7ce22a2..a4573bc 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -444,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
- u32 portid)
+ u32 portid, struct netlink_ext_ack *extack)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -489,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
*/
ivi.ivi_addr = ifa->ifa_address;
ivi.ivi_dev = ifa->ifa_dev;
+ ivi.extack = extack;
ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
NETDEV_UP, &ivi);
ret = notifier_to_errno(ret);
@@ -521,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
- return __inet_insert_ifa(ifa, NULL, 0);
+ return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -902,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return ret;
}
}
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
+ extack);
} else {
inet_free_ifa(ifa);
@@ -1522,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (inetdev_valid_mtu(dev->mtu))
break;
/* disable IP when MTU is not enough */
+ /* fall through */
case NETDEV_UNREGISTER:
inetdev_destroy(in_dev);
break;
@@ -1757,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
- if (dev && !__in_dev_get_rtnl(dev))
+ if (dev && !__in_dev_get_rcu(dev))
return -EAFNOSUPPORT;
err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1781,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int rem;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f028191..f52d27a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -73,6 +73,11 @@ fail:
fib_free_table(main_table);
return -ENOMEM;
}
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+ return false;
+}
#else
struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
}
return NULL;
}
+
+static bool fib4_has_custom_rules(struct net *net)
+{
+ return net->ipv4.fib_has_custom_rules;
+}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static void fib_replace_table(struct net *net, struct fib_table *old,
@@ -405,10 +415,12 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
if (IN_DEV_ACCEPT_LOCAL(idev))
goto ok;
- /* if no local routes are added from user space we can check
- * for local addresses looking-up the ifaddr table
+ /* with custom local routes in place, checking local addresses
+ * only will be too optimistic, with custom rules, checking
+ * local addresses only can be too strict, e.g. due to vrf
*/
- if (net->ipv4.fib_has_custom_local_routes)
+ if (net->ipv4.fib_has_custom_local_routes ||
+ fib4_has_custom_rules(net))
goto full_check;
if (inet_lookup_ifaddr_rcu(net, src))
return -EINVAL;
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 5b2af19..e6ff282 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FIB_LOOKUP_H
#define _FIB_LOOKUP_H
diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c
index cfd420b0..b804ccb 100644
--- a/net/ipv4/fib_notifier.c
+++ b/net/ipv4/fib_notifier.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/socket.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 467b3c1..589caaa 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1356,8 +1356,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
goto nla_put_failure;
if (fi->fib_nhs == 1) {
- struct in_device *in_dev;
-
if (fi->fib_nh->nh_gw &&
nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
goto nla_put_failure;
@@ -1365,10 +1363,14 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
goto nla_put_failure;
if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev);
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtm->rtm_flags |= RTNH_F_DEAD;
+ rcu_read_unlock();
}
if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD)
rtm->rtm_flags |= RTNH_F_OFFLOAD;
@@ -1391,18 +1393,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
for_nexthops(fi) {
- struct in_device *in_dev;
-
rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
if (!rtnh)
goto nla_put_failure;
rtnh->rtnh_flags = nh->nh_flags & 0xFF;
if (nh->nh_flags & RTNH_F_LINKDOWN) {
- in_dev = __in_dev_get_rtnl(nh->nh_dev);
+ struct in_device *in_dev;
+
+ rcu_read_lock();
+ in_dev = __in_dev_get_rcu(nh->nh_dev);
if (in_dev &&
IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
rtnh->rtnh_flags |= RTNH_F_DEAD;
+ rcu_read_unlock();
}
rtnh->rtnh_hops = nh->nh_weight - 1;
rtnh->rtnh_ifindex = nh->nh_oif;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c636650..5ddc4aef 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,32 +87,32 @@
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa)
{
struct fib_entry_notifier_info info = {
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa,
+ struct netlink_ext_ack *extack)
{
struct fib_entry_notifier_info info = {
+ .info.extack = extack,
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
return call_fib4_notifiers(net, event_type, &info.info);
}
@@ -1216,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, fi,
- new_fa->fa_tos, cfg->fc_type,
- tb->tb_id);
+ key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1273,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type,
- tb->tb_id);
+ call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
@@ -1574,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
return -ESRCH;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete->fa_info, tos,
- fa_to_delete->fa_type, tb->tb_id);
+ fa_to_delete, extack);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1892,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos, fa->fa_type,
- tb->tb_id);
+ KEYLENGTH - fa->fa_slen, fa,
+ NULL);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -1932,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
continue;
call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
- fa->fa_type, fa->tb_id);
+ KEYLENGTH - fa->fa_slen, fa);
}
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 416bb30..1859c47 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -86,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 681e339..1617604 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
}
/*
- * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
*/
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
if (iph->ihl < 5) /* Mangled header, drop. */
goto out_err;
- if (icmph->type == ICMP_DEST_UNREACH) {
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
switch (icmph->code & 15) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
}
if (icmph->code > NR_ICMP_UNREACH)
goto out;
- } else if (icmph->type == ICMP_PARAMETERPROB)
+ break;
+ case ICMP_PARAMETERPROB:
info = ntohl(icmph->un.gateway) >> 24;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
+ if (icmph->code == ICMP_EXC_FRAGTIME)
+ goto out;
+ break;
+ }
/*
* Throw it at our lower layers
@@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb)
*/
icmp_param.data.times[1] = inet_current_timestamp();
icmp_param.data.times[2] = icmp_param.data.times[1];
- if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
- BUG();
+
+ BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4));
+
icmp_param.data.icmph = *icmp_hdr(skb);
icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
icmp_param.data.icmph.code = 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8a91ebb..4ca46dc 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
}
spin_unlock_bh(&queue->fastopenq.lock);
}
+ mem_cgroup_sk_alloc(newsk);
out:
release_sock(sk);
if (req)
@@ -494,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
* to optimize.
*/
void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long))
+ void (*retransmit_handler)(struct timer_list *t),
+ void (*delack_handler)(struct timer_list *t),
+ void (*keepalive_handler)(struct timer_list *t))
{
struct inet_connection_sock *icsk = inet_csk(sk);
- setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
- (unsigned long)sk);
- setup_timer(&icsk->icsk_delack_timer, delack_handler,
- (unsigned long)sk);
- setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
+ timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+ timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
+ timer_setup(&sk->sk_timer, keepalive_handler, 0);
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}
EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -539,9 +538,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -575,10 +576,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -591,13 +591,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -676,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
-static void reqsk_timer_handler(unsigned long data)
+static void reqsk_timer_handler(struct timer_list *t)
{
- struct request_sock *req = (struct request_sock *)data;
+ struct request_sock *req = from_timer(req, t, rsk_timer);
struct sock *sk_listener = req->rsk_listener;
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -749,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
req->num_timeout = 0;
req->sk = NULL;
- setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
- (unsigned long)req);
+ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
inet_ehash_insert(req_to_sk(req), NULL);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index af74d04..7f3ef5c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
+ f->frag_expire(&fq->timer);
return evicted;
}
@@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize);
- setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
refcount_set(&q->refcnt, 1);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 597bb4c..e7d15fb0 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return reuseport_add_sock(sk, sk2);
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
int __inet_hash(struct sock *sk, struct sock *osk)
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b03915..a4bab81 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -142,9 +142,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-static void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(struct timer_list *t)
{
- struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+ struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
if (tw->tw_kill)
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -188,8 +188,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
- setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
- (unsigned long)tw);
+ timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6e5626c..914d569 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -128,9 +128,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
break;
}
if (cmp == -1)
- pp = &(*pp)->rb_left;
+ pp = &next->rb_left;
else
- pp = &(*pp)->rb_right;
+ pp = &next->rb_right;
}
*parent_p = parent;
*pp_p = pp;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 9f0a7b9..2dd21c3 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46408c2..bbf1b94 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -190,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user)
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
-static void ip_expire(unsigned long arg)
+static void ip_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct ipq *qp;
struct net *net;
- qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9cee986..c105a31 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -259,7 +259,6 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
struct ip_tunnel *tunnel;
struct erspanhdr *ershdr;
const struct iphdr *iph;
- __be32 session_id;
__be32 index;
int len;
@@ -275,8 +274,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
- session_id = cpu_to_be32(ntohs(ershdr->session_id));
- tpi->key = session_id;
+ tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
index = ershdr->md.index;
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
@@ -581,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (gre_handle_offloads(skb, false))
goto err_free_rt;
- if (skb->len > dev->mtu) {
- pskb_trim(skb, dev->mtu);
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
truncate = true;
}
@@ -733,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
- if (skb->len > dev->mtu) {
- pskb_trim(skb, dev->mtu);
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
truncate = true;
}
@@ -1222,6 +1220,7 @@ static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
return ip_tunnel_init(dev);
}
@@ -1245,13 +1244,16 @@ static int erspan_tunnel_init(struct net_device *dev)
tunnel->tun_hlen = 8;
tunnel->parms.iph.protocol = IPPROTO_GRE;
- t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ sizeof(struct erspanhdr);
+ t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
return ip_tunnel_init(dev);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fa2dc8f..57fc13c 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -311,9 +311,10 @@ drop:
static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
- struct rtable *rt;
+ int (*edemux)(struct sk_buff *skb);
struct net_device *dev = skb->dev;
- void (*edemux)(struct sk_buff *skb);
+ struct rtable *rt;
+ int err;
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- edemux(skb);
+ err = edemux(skb);
+ if (unlikely(err))
+ goto drop_error;
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
@@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, dev);
- if (unlikely(err)) {
- if (err == -EXDEV)
- __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- goto drop;
- }
+ err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, dev);
+ if (unlikely(err))
+ goto drop_error;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
drop:
kfree_skb(skb);
return NET_RX_DROP;
+
+drop_error:
+ if (err == -EXDEV)
+ __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
+ goto drop;
}
/*
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 525ae88..ed194d4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a599aa8..60fb1eb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 02d70ca..949f432 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_parm *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
+ int pkt_len = skb->len;
int err;
int mtu;
@@ -197,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tunnel->err_count > 0) {
- if (time_before(jiffies,
- tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
- tunnel->err_count--;
- dst_link_failure(skb);
- } else
- tunnel->err_count = 0;
- }
-
mtu = dst_mtu(dst);
if (skb->len > mtu) {
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
@@ -229,7 +221,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
- err = skb->len;
+ err = pkt_len;
iptunnel_xmit_stats(dev, err);
return NETDEV_TX_OK;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4c5dfe6..abdebca 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
* user-supplied information to configure own IP address and routes.
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 1e47818..c891235 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b3ee01b..40a43ad 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1528,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
case MRT_ADD_MFC:
case MRT_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT_ADD_MFC_PROXY:
case MRT_DEL_MFC_PROXY:
if (optlen != sizeof(mfc)) {
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index f462fee..adcdae3 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules on top of IPv4.
#
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 811689e..f75fc6b 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv,
if (synproxy == NULL)
return NF_ACCEPT;
- if (nf_is_loopback_packet(skb))
+ if (nf_is_loopback_packet(skb) ||
+ ip_hdr(skb)->protocol != IPPROTO_TCP)
return NF_ACCEPT;
thoff = ip_hdrlen(skb);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index a0f37b2..0443ca4 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index eeacbda..5cd06ba 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
goto free_nskb;
+ niph = ip_hdr(nskb);
+
/* "Never happens" */
if (nskb->len > dst_mtu(skb_dst(nskb)))
goto free_nskb;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94d4cd2..bc40bd4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,7 +1250,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
EXPORT_SYMBOL(rt_dst_alloc);
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- struct rtable *rth;
- struct in_device *in_dev = __in_dev_get_rcu(dev);
- unsigned int flags = RTCF_MULTICAST;
- u32 itag = 0;
int err;
/* Primary sanity checks. */
-
if (!in_dev)
return -EINVAL;
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ return -EINVAL;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- goto e_inval;
+ return -EINVAL;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr))
- goto e_inval;
+ return -EINVAL;
} else {
err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
+ in_dev, itag);
if (err < 0)
- goto e_err;
+ return err;
}
+ return 0;
+}
+
+/* called in rcu_read_lock() section */
+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, int our)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ unsigned int flags = RTCF_MULTICAST;
+ struct rtable *rth;
+ u32 itag = 0;
+ int err;
+
+ err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
+ if (err)
+ return err;
+
if (our)
flags |= RTCF_LOCAL;
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
- goto e_nobufs;
+ return -ENOBUFS;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_set(skb, &rth->dst);
return 0;
-
-e_nobufs:
- return -ENOBUFS;
-e_inval:
- return -EINVAL;
-e_err:
- return err;
}
@@ -2507,7 +2513,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
struct rtable *ort = (struct rtable *) dst_orig;
struct rtable *rt;
- rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
+ rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
if (rt) {
struct dst_entry *new = &rt->dst;
@@ -3032,7 +3038,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
int __init ip_rt_init(void)
{
- int rc = 0;
int cpu;
ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3089,7 +3094,7 @@ int __init ip_rt_init(void)
#endif
register_pernet_subsys(&rt_genid_ops);
register_pernet_subsys(&ipv4_inetpeer_ops);
- return rc;
+ return 0;
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b1bb1b3..fda37f2 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(sock_net(sk), skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
@@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index cac8dd3..a82b440 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
@@ -25,6 +26,7 @@
#include <net/inet_frag.h>
#include <net/ping.h>
#include <net/protocol.h>
+#include <net/netevent.h>
static int zero;
static int one = 1;
@@ -284,7 +286,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ret = -EINVAL;
goto bad_key;
}
- tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, NULL, user_key,
+ TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
@@ -384,15 +387,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
return ret;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_fib_multipath_hash_policy);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net);
+
+ return ret;
+}
+#endif
+
static struct ctl_table ipv4_table[] = {
{
- .procname = "tcp_retrans_collapse",
- .data = &sysctl_tcp_retrans_collapse,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_max_orphans",
.data = &sysctl_tcp_max_orphans,
.maxlen = sizeof(int),
@@ -400,27 +413,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_abort_on_overflow",
- .data = &sysctl_tcp_abort_on_overflow,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_stdurg",
- .data = &sysctl_tcp_stdurg,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_rfc1337",
- .data = &sysctl_tcp_rfc1337,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "inet_peer_threshold",
.data = &inet_peer_threshold,
.maxlen = sizeof(int),
@@ -442,34 +434,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
- .procname = "tcp_fack",
- .data = &sysctl_tcp_fack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_recovery",
- .data = &sysctl_tcp_recovery,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_max_reordering",
- .data = &sysctl_tcp_max_reordering,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_dsack",
- .data = &sysctl_tcp_dsack,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_mem",
.maxlen = sizeof(sysctl_tcp_mem),
.data = &sysctl_tcp_mem,
@@ -493,36 +457,6 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &one,
},
{
- .procname = "tcp_app_win",
- .data = &sysctl_tcp_app_win,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_adv_win_scale",
- .data = &sysctl_tcp_adv_win_scale,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &tcp_adv_win_scale_min,
- .extra2 = &tcp_adv_win_scale_max,
- },
- {
- .procname = "tcp_frto",
- .data = &sysctl_tcp_frto,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_min_rtt_wlen",
- .data = &sysctl_tcp_min_rtt_wlen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency,
.maxlen = sizeof(int),
@@ -530,60 +464,11 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_no_metrics_save",
- .data = &sysctl_tcp_nometrics_save,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_moderate_rcvbuf",
- .data = &sysctl_tcp_moderate_rcvbuf,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_tso_win_divisor",
- .data = &sysctl_tcp_tso_win_divisor,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "tcp_congestion_control",
.mode = 0644,
.maxlen = TCP_CA_NAME_MAX,
.proc_handler = proc_tcp_congestion_control,
},
- {
- .procname = "tcp_workaround_signed_windows",
- .data = &sysctl_tcp_workaround_signed_windows,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_limit_output_bytes",
- .data = &sysctl_tcp_limit_output_bytes,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_challenge_ack_limit",
- .data = &sysctl_tcp_challenge_ack_limit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_slow_start_after_idle",
- .data = &sysctl_tcp_slow_start_after_idle,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
#ifdef CONFIG_NETLABEL
{
.procname = "cipso_cache_enable",
@@ -627,65 +512,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_allowed_congestion_control,
},
{
- .procname = "tcp_thin_linear_timeouts",
- .data = &sysctl_tcp_thin_linear_timeouts,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
- .procname = "tcp_early_retrans",
- .data = &sysctl_tcp_early_retrans,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &four,
- },
- {
- .procname = "tcp_min_tso_segs",
- .data = &sysctl_tcp_min_tso_segs,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &gso_max_segs,
- },
- {
- .procname = "tcp_pacing_ss_ratio",
- .data = &sysctl_tcp_pacing_ss_ratio,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &thousand,
- },
- {
- .procname = "tcp_pacing_ca_ratio",
- .data = &sysctl_tcp_pacing_ca_ratio,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &thousand,
- },
- {
- .procname = "tcp_autocorking",
- .data = &sysctl_tcp_autocorking,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
- {
- .procname = "tcp_invalid_ratelimit",
- .data = &sysctl_tcp_invalid_ratelimit,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
- {
.procname = "tcp_available_ulp",
.maxlen = TCP_ULP_BUF_MAX,
.mode = 0444,
@@ -1100,7 +926,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_fib_multipath_hash_policy,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_fib_multipath_hash_policy,
.extra1 = &zero,
.extra2 = &one,
},
@@ -1144,6 +970,200 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_early_retrans",
+ .data = &init_net.ipv4.sysctl_tcp_early_retrans,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &four,
+ },
+ {
+ .procname = "tcp_recovery",
+ .data = &init_net.ipv4.sysctl_tcp_recovery,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_thin_linear_timeouts",
+ .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_slow_start_after_idle",
+ .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_retrans_collapse",
+ .data = &init_net.ipv4.sysctl_tcp_retrans_collapse,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_stdurg",
+ .data = &init_net.ipv4.sysctl_tcp_stdurg,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_rfc1337",
+ .data = &init_net.ipv4.sysctl_tcp_rfc1337,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_abort_on_overflow",
+ .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_fack",
+ .data = &init_net.ipv4.sysctl_tcp_fack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_max_reordering",
+ .data = &init_net.ipv4.sysctl_tcp_max_reordering,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_dsack",
+ .data = &init_net.ipv4.sysctl_tcp_dsack,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_app_win",
+ .data = &init_net.ipv4.sysctl_tcp_app_win,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_adv_win_scale",
+ .data = &init_net.ipv4.sysctl_tcp_adv_win_scale,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_adv_win_scale_min,
+ .extra2 = &tcp_adv_win_scale_max,
+ },
+ {
+ .procname = "tcp_frto",
+ .data = &init_net.ipv4.sysctl_tcp_frto,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_no_metrics_save",
+ .data = &init_net.ipv4.sysctl_tcp_nometrics_save,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_moderate_rcvbuf",
+ .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_tso_win_divisor",
+ .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_workaround_signed_windows",
+ .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_limit_output_bytes",
+ .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_challenge_ack_limit",
+ .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_min_tso_segs",
+ .data = &init_net.ipv4.sysctl_tcp_min_tso_segs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &gso_max_segs,
+ },
+ {
+ .procname = "tcp_min_rtt_wlen",
+ .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_autocorking",
+ .data = &init_net.ipv4.sysctl_tcp_autocorking,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .procname = "tcp_invalid_ratelimit",
+ .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "tcp_pacing_ss_ratio",
+ .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
+ {
+ .procname = "tcp_pacing_ca_ratio",
+ .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &thousand,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 23225c9..c4cb19e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -270,6 +270,7 @@
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/errqueue.h>
+#include <linux/static_key.h>
#include <net/icmp.h>
#include <net/inet_common.h>
@@ -282,9 +283,7 @@
#include <asm/ioctls.h>
#include <net/busy_poll.h>
-int sysctl_tcp_min_tso_segs __read_mostly = 2;
-
-int sysctl_tcp_autocorking __read_mostly = 1;
+#include <trace/events/tcp.h>
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -300,6 +299,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
+#if IS_ENABLED(CONFIG_SMC)
+DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
+EXPORT_SYMBOL(tcp_have_smc);
+#endif
+
/*
* Current number of TCP sockets.
*/
@@ -413,8 +417,10 @@ void tcp_init_sock(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
tp->out_of_order_queue = RB_ROOT;
+ sk->tcp_rtx_queue = RB_ROOT;
tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node);
+ INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -441,6 +447,7 @@ void tcp_init_sock(struct sock *sk)
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
+ tp->rack.reo_wnd_steps = 1;
sk->sk_state = TCP_CLOSE;
@@ -456,8 +463,22 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
+void tcp_init_transfer(struct sock *sk, int bpf_op)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_mtup_init(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_metrics(sk);
+ tcp_call_bpf(sk, bpf_op);
+ tcp_init_congestion_control(sk);
+ tcp_init_buffer_space(sk);
+}
+
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
{
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -675,7 +696,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sysctl_tcp_autocorking &&
+ sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
skb != tcp_write_queue_head(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -686,10 +707,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- if (!tcp_send_head(sk))
- return;
-
skb = tcp_write_queue_tail(sk);
+ if (!skb)
+ return;
if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb);
@@ -869,6 +889,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* available to the caller, no more, no less.
*/
skb->reserved_tailroom = skb->end - skb->tail - size;
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
__kfree_skb(skb);
@@ -948,14 +969,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
int copy, i;
bool can_coalesce;
- if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+ if (!skb || (copy = size_goal - skb->len) <= 0 ||
!tcp_skb_can_collapse_to(skb)) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
- skb_queue_empty(&sk->sk_write_queue));
+ tcp_rtx_and_write_queues_empty(sk));
if (!skb)
goto wait_for_memory;
@@ -1027,7 +1048,7 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sk->sk_tsflags);
if (!(flags & MSG_SENDPAGE_NOTLAST))
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
@@ -1183,7 +1204,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
+ skb = tcp_write_queue_tail(sk);
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) {
err = -ENOBUFS;
@@ -1259,7 +1280,7 @@ restart:
int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (tcp_send_head(sk)) {
+ if (skb) {
if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now;
copy = max - skb->len;
@@ -1279,7 +1300,7 @@ new_segment:
process_backlog = false;
goto restart;
}
- first_skb = skb_queue_empty(&sk->sk_write_queue);
+ first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg, first_skb),
sk->sk_allocation,
@@ -1404,7 +1425,7 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sockc.tsflags);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
@@ -1505,6 +1526,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
+ err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
+ if (err)
+ return err;
+ copied += skb->len;
+ }
+
skb_queue_walk(&sk->sk_write_queue, skb) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
@@ -2017,6 +2045,8 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
+ trace_tcp_set_state(sk, oldstate, state);
+
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
@@ -2304,6 +2334,37 @@ static inline bool tcp_need_reset(int state)
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
}
+static void tcp_rtx_queue_purge(struct sock *sk)
+{
+ struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+
+ while (p) {
+ struct sk_buff *skb = rb_to_skb(p);
+
+ p = rb_next(p);
+ /* Since we are deleting whole queue, no need to
+ * list_del(&skb->tcp_tsorted_anchor)
+ */
+ tcp_rtx_queue_unlink(skb, sk);
+ sk_wmem_free_skb(sk, skb);
+ }
+}
+
+void tcp_write_queue_purge(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+ while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ tcp_skb_tsorted_anchor_cleanup(skb);
+ sk_wmem_free_skb(sk, skb);
+ }
+ tcp_rtx_queue_purge(sk);
+ INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
+ sk_mem_reclaim(sk);
+ tcp_clear_all_retrans_hints(tcp_sk(sk));
+}
+
int tcp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2362,7 +2423,6 @@ int tcp_disconnect(struct sock *sk, int flags)
* issue in __tcp_select_window()
*/
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
- tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
dst_release(sk->sk_rx_dst);
@@ -2454,7 +2514,7 @@ static int tcp_repair_options_est(struct sock *sk,
return -EINVAL;
tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
- if (sysctl_tcp_fack)
+ if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
break;
case TCPOPT_TIMESTAMP:
@@ -2518,6 +2578,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
release_sock(sk);
return err;
}
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+ if (optlen != sizeof(key))
+ return -EINVAL;
+
+ if (copy_from_user(key, optval, optlen))
+ return -EFAULT;
+
+ return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+ }
default:
/* fallthru */
break;
@@ -2768,6 +2839,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EOPNOTSUPP;
}
break;
+ case TCP_FASTOPEN_NO_COOKIE:
+ if (val > 1 || val < 0)
+ err = -EINVAL;
+ else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ err = -EINVAL;
+ else
+ tp->fastopen_no_cookie = val;
+ break;
case TCP_TIMESTAMP:
if (!tp->repair)
err = -EPERM;
@@ -3104,6 +3183,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctx;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
+ if (ctx)
+ memcpy(key, ctx->key, sizeof(key));
+ else
+ len = 0;
+ rcu_read_unlock();
+
+ len = min_t(unsigned int, len, sizeof(key));
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, key, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_THIN_LINEAR_TIMEOUTS:
val = tp->thin_lto;
break;
@@ -3166,6 +3267,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tp->fastopen_connect;
break;
+ case TCP_FASTOPEN_NO_COOKIE:
+ val = tp->fastopen_no_cookie;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp_raw() + tp->tsoffset;
break;
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 66ac69f..06fbe10 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -389,7 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
kfree(ca->gradients);
}
-struct tcp_congestion_ops tcp_cdg __read_mostly = {
+static struct tcp_congestion_ops tcp_cdg __read_mostly = {
.cong_avoid = tcp_cdg_cong_avoid,
.cwnd_event = tcp_cdg_cwnd_event,
.pkts_acked = tcp_cdg_acked,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index de470e7..78c192e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -29,7 +30,7 @@ void tcp_fastopen_init_key_once(struct net *net)
* for a valid cookie, so this is an acceptable risk.
*/
get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(net, key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -40,6 +41,16 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}
+void tcp_fastopen_destroy_cipher(struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference_protected(
+ inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
+ if (ctx)
+ call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
+}
+
void tcp_fastopen_ctx_destroy(struct net *net)
{
struct tcp_fastopen_context *ctxt;
@@ -55,10 +66,12 @@ void tcp_fastopen_ctx_destroy(struct net *net)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
}
-int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len)
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+ void *key, unsigned int len)
{
- int err;
struct tcp_fastopen_context *ctx, *octx;
+ struct fastopen_queue *q;
+ int err;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -79,11 +92,18 @@ error: kfree(ctx);
}
memcpy(ctx->key, key, len);
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
- octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+ if (sk) {
+ q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+ octx = rcu_dereference_protected(q->ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(q->ctx, ctx);
+ } else {
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ }
spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
@@ -91,15 +111,18 @@ error: kfree(ctx);
return err;
}
-static bool __tcp_fastopen_cookie_gen(struct net *net,
- const void *path,
+static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;
rcu_read_lock();
- ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -115,7 +138,7 @@ static bool __tcp_fastopen_cookie_gen(struct net *net,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct net *net,
+static bool tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
@@ -124,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct net *net,
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(net, path, foc);
+ return __tcp_fastopen_cookie_gen(sk, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -132,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct net *net,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(net, buf, foc);
+ return __tcp_fastopen_cookie_gen(sk, buf, foc);
}
}
#endif
@@ -236,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_metrics(child);
- tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_buffer_space(child);
+ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
@@ -291,13 +309,23 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
return true;
}
+static bool tcp_fastopen_no_cookie(const struct sock *sk,
+ const struct dst_entry *dst,
+ int flag)
+{
+ return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ tcp_sk(sk)->fastopen_no_cookie ||
+ (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
+}
+
/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
* may be updated and return the client in the SYN-ACK later. E.g., Fast Open
* cookie request (foc->len == 0).
*/
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
@@ -314,11 +342,12 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
return NULL;
}
- if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ if (syn_data &&
+ tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -351,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
unsigned long last_syn_loss = 0;
+ const struct dst_entry *dst;
int syn_loss = 0;
tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
@@ -368,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
return false;
}
- if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ dst = __sk_dst_get(sk);
+
+ if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) {
cookie->len = -1;
return true;
}
@@ -470,17 +502,15 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node *p;
- struct sk_buff *skb;
struct dst_entry *dst;
+ struct sk_buff *skb;
if (!tp->syn_fastopen)
return;
if (!tp->data_segs_in) {
- p = rb_first(&tp->out_of_order_queue);
- if (p && !rb_next(p)) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
+ if (skb && !skb_rb_next(skb)) {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
tcp_fastopen_active_disable(sk);
return;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index db9bb46..0ada8bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
@@ -75,25 +76,10 @@
#include <linux/ipsec.h>
#include <asm/unaligned.h>
#include <linux/errqueue.h>
+#include <trace/events/tcp.h>
+#include <linux/static_key.h>
-int sysctl_tcp_fack __read_mostly;
-int sysctl_tcp_max_reordering __read_mostly = 300;
-int sysctl_tcp_dsack __read_mostly = 1;
-int sysctl_tcp_app_win __read_mostly = 31;
-int sysctl_tcp_adv_win_scale __read_mostly = 1;
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-
-/* rfc5961 challenge ack rate limiting */
-int sysctl_tcp_challenge_ack_limit = 1000;
-
-int sysctl_tcp_stdurg __read_mostly;
-int sysctl_tcp_rfc1337 __read_mostly;
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
-int sysctl_tcp_frto __read_mostly = 2;
-int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
-int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 3;
-int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -367,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(skb->truesize) >> 1;
- int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
+ int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int window = tcp_win_from_space(sk, sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
@@ -393,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
incr = 2 * tp->advmss;
else
incr = __tcp_grow_window(sk, skb);
@@ -419,7 +405,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
/* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
* Allow enough cushion so that sender is not limited by our window
*/
- if (sysctl_tcp_moderate_rcvbuf)
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf)
rcvmem <<= 2;
if (sk->sk_rcvbuf < rcvmem)
@@ -431,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
*/
void tcp_init_buffer_space(struct sock *sk)
{
+ int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -449,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk)
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
- if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
+ if (tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin -
- (maxwin >> sysctl_tcp_app_win),
+ (maxwin >> tcp_app_win),
4 * tp->advmss);
}
/* Force reservation of one segment. */
- if (sysctl_tcp_app_win &&
+ if (tcp_app_win &&
tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
@@ -609,7 +596,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sysctl_tcp_moderate_rcvbuf &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvwin, rcvmem, rcvbuf;
@@ -633,7 +620,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
}
rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
- while (tcp_win_from_space(rcvmem) < tp->advmss)
+ while (tcp_win_from_space(sk, rcvmem) < tp->advmss)
rcvmem += 128;
rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
@@ -780,15 +767,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->srtt_us = max(1U, srtt);
}
-/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
- * Note: TCP stack does not yet implement pacing.
- * FQ packet scheduler can be used to implement cheap but effective
- * TCP pacing, to smooth the burst on large writes when packets
- * in flight is significantly lower than cwnd (or rwin)
- */
-int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
-int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
-
static void tcp_update_pacing_rate(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -806,9 +784,9 @@ static void tcp_update_pacing_rate(struct sock *sk)
* end of slow start and should slow down.
*/
if (tp->snd_cwnd < tp->snd_ssthresh / 2)
- rate *= sysctl_tcp_pacing_ss_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
else
- rate *= sysctl_tcp_pacing_ca_ratio;
+ rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
rate *= max(tp->snd_cwnd, tp->packets_out);
@@ -878,6 +856,7 @@ void tcp_disable_fack(struct tcp_sock *tp)
static void tcp_dsack_seen(struct tcp_sock *tp)
{
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
+ tp->rack.dsack_seen = 1;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -890,7 +869,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
return;
if (metric > tp->reordering) {
- tp->reordering = min(sysctl_tcp_max_reordering, metric);
+ tp->reordering = min(sock_net(sk)->ipv4.sysctl_tcp_max_reordering, metric);
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
@@ -1142,6 +1121,7 @@ struct tcp_sacktag_state {
u64 last_sackt;
struct rate_sample *rate;
int flag;
+ unsigned int mss_now;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1171,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
if (pkt_len >= skb->len && !in_sack)
return 0;
- err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+ err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
@@ -1288,13 +1269,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Shift newly-SACKed bytes from this skb to the immediately previous
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
*/
-static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ struct sk_buff *skb,
struct tcp_sacktag_state *state,
unsigned int pcount, int shifted, int mss,
bool dup_sack)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
@@ -1363,8 +1344,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
@@ -1414,9 +1394,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto fallback;
/* Can only happen with delayed DSACK + discard craziness */
- if (unlikely(skb == tcp_write_queue_head(sk)))
+ prev = skb_rb_prev(skb);
+ if (!prev)
goto fallback;
- prev = tcp_write_queue_prev(sk, skb);
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
@@ -1495,18 +1475,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!skb_shift(prev, skb, len))
goto fallback;
- if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
/* Hole filled allows collapsing with the next as well, this is very
* useful when hole on every nth skb pattern happens
*/
- if (prev == tcp_write_queue_tail(sk))
+ skb = skb_rb_next(prev);
+ if (!skb)
goto out;
- skb = tcp_write_queue_next(sk, prev);
if (!skb_can_shift(skb) ||
- (skb == tcp_send_head(sk)) ||
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
(mss != tcp_skb_seglen(skb)))
goto out;
@@ -1514,7 +1493,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
len = skb->len;
if (skb_shift(prev, skb, len)) {
pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ len, mss, 0);
}
out:
@@ -1538,13 +1518,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *tmp;
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
int in_sack = 0;
bool dup_sack = dup_sack_in;
- if (skb == tcp_send_head(sk))
- break;
-
/* queue is in-order => we can short-circuit the walk early */
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
@@ -1593,6 +1570,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
tcp_skb_pcount(skb),
skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ list_del_init(&skb->tcp_tsorted_anchor);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
@@ -1604,23 +1583,44 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
return skb;
}
-/* Avoid all extra work that is being done by sacktag while walking in
- * a normal way
- */
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+ struct tcp_sacktag_state *state,
+ u32 seq)
+{
+ struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+ struct sk_buff *skb;
+ int unack_bytes;
+
+ while (*p) {
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (before(seq, TCP_SKB_CB(skb)->seq)) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+ p = &parent->rb_right;
+ continue;
+ }
+
+ state->fack_count = 0;
+ unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
+ if (state->mss_now && unack_bytes > 0)
+ state->fack_count = unack_bytes / state->mss_now;
+
+ return skb;
+ }
+ return NULL;
+}
+
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
- if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
- break;
+ if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+ return skb;
- state->fack_count += tcp_skb_pcount(skb);
- }
- return skb;
+ return tcp_sacktag_bsearch(sk, state, skip_to_seq);
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1742,8 +1742,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
}
- skb = tcp_write_queue_head(sk);
+ state->mss_now = tcp_current_mss(sk);
state->fack_count = 0;
+ skb = NULL;
i = 0;
if (!tp->sacked_out) {
@@ -1967,7 +1968,7 @@ void tcp_enter_loss(struct sock *sk)
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
@@ -1976,10 +1977,7 @@ void tcp_enter_loss(struct sock *sk)
}
tcp_clear_all_retrans_hints(tp);
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
+ skb_rbtree_walk_from(skb) {
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
is_reneg);
if (mark_lost)
@@ -2013,7 +2011,7 @@ void tcp_enter_loss(struct sock *sk)
* falsely raise the receive window, which results in repeated
* timeouts and stop-and-go behavior.
*/
- tp->frto = sysctl_tcp_frto &&
+ tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2205,20 +2203,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
WARN_ON(packets > tp->packets_out);
- if (tp->lost_skb_hint) {
- skb = tp->lost_skb_hint;
- cnt = tp->lost_cnt_hint;
+ skb = tp->lost_skb_hint;
+ if (skb) {
/* Head already handled? */
- if (mark_head && skb != tcp_write_queue_head(sk))
+ if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
return;
+ cnt = tp->lost_cnt_hint;
} else {
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
cnt = 0;
}
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk_from(skb) {
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
@@ -2242,7 +2238,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
/* If needed, chop off the prefix to mark as lost. */
lost = (packets - oldcnt) * mss;
if (lost < skb->len &&
- tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+ tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
@@ -2326,7 +2323,7 @@ static bool tcp_any_retrans_done(const struct sock *sk)
if (tp->retrans_out)
return true;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
return true;
@@ -2367,9 +2364,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (unmark_loss) {
struct sk_buff *skb;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
tp->lost_out = 0;
@@ -2414,6 +2409,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
mib_idx = LINUX_MIB_TCPFULLUNDO;
NET_INC_STATS(sock_net(sk), mib_idx);
+ } else if (tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_persist--;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2433,6 +2430,8 @@ static bool tcp_try_undo_dsack(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->undo_marker && !tp->undo_retrans) {
+ tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH,
+ tp->rack.reo_wnd_persist + 1);
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
@@ -2614,9 +2613,7 @@ void tcp_simple_retransmit(struct sock *sk)
unsigned int mss = tcp_current_mss(sk);
u32 prior_lost = tp->lost_out;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2710,7 +2707,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
* is updated in tcp_ack()). Otherwise fall back to
* the conventional recovery.
*/
- if (tcp_send_head(sk) &&
+ if (!tcp_write_queue_empty(sk) &&
after(tcp_wnd_end(tp), tp->snd_nxt)) {
*rexmit = REXMIT_NEW;
return;
@@ -2772,7 +2769,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag)
struct tcp_sock *tp = tcp_sk(sk);
/* Use RACK to detect loss */
- if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
@@ -2802,9 +2799,9 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (WARN_ON(!tp->sacked_out && tp->fackets_out))
+ if (!tp->sacked_out && tp->fackets_out)
tp->fackets_out = 0;
/* Now state machine starts.
@@ -2871,6 +2868,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
(*ack_flag & FLAG_LOST_RETRANS)))
return;
/* Change state if cwnd is undone or retransmits are lost */
+ /* fall through */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2911,8 +2909,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
{
+ u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
struct tcp_sock *tp = tcp_sk(sk);
- u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32,
rtt_us ? : jiffies_to_usecs(1));
@@ -3054,8 +3052,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
shinfo = skb_shinfo(skb);
if (!before(shinfo->tskey, prior_snd_una) &&
- before(shinfo->tskey, tcp_sk(sk)->snd_una))
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+ tcp_skb_tsorted_save(skb) {
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ } tcp_skb_tsorted_restore(skb);
+ }
}
/* Remove acknowledged frames from the retransmission queue. If our packet
@@ -3071,11 +3072,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
+ struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
- struct sk_buff *skb;
u32 pkts_acked = 0;
u32 last_in_flight = 0;
bool rtt_update;
@@ -3083,7 +3084,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = 0;
- while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u8 sacked = scb->sacked;
u32 acked_pcount;
@@ -3101,8 +3102,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
break;
fully_acked = false;
} else {
- /* Speedup tcp_unlink_write_queue() and next loop */
- prefetchw(skb->next);
acked_pcount = tcp_skb_pcount(skb);
}
@@ -3154,12 +3153,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (!fully_acked)
break;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_rtx_queue_unlink_and_free(skb, sk);
}
if (!skb)
@@ -3251,12 +3250,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
static void tcp_ack_probe(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *head = tcp_send_head(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Was it a usable window open? */
-
- if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+ if (!head)
+ return;
+ if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3376,7 +3377,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
tp->pred_flags = 0;
tcp_fast_path_check(sk);
- if (tcp_send_head(sk))
+ if (!tcp_write_queue_empty(sk))
tcp_slow_start_after_idle_check(sk);
if (nwin > tp->max_window) {
@@ -3397,7 +3398,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
@@ -3433,10 +3434,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static u32 challenge_timestamp;
static unsigned int challenge_count;
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
u32 count, now;
/* First check our per-socket dupack rate limit. */
- if (__tcp_oow_rate_limited(sock_net(sk),
+ if (__tcp_oow_rate_limited(net,
LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
&tp->last_oow_ack_time))
return;
@@ -3444,16 +3446,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1;
+ u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
- WRITE_ONCE(challenge_count, half +
- prandom_u32_max(sysctl_tcp_challenge_ack_limit));
+ WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
}
count = READ_ONCE(challenge_count);
if (count > 0) {
WRITE_ONCE(challenge_count, count - 1);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
}
@@ -3561,8 +3563,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_state.first_sackt = 0;
sack_state.rate = &rs;
- /* We very likely will need to access write queue head. */
- prefetchw(sk->sk_write_queue.next);
+ /* We very likely will need to access rtx queue. */
+ prefetch(sk->tcp_rtx_queue.rb_node);
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3647,6 +3649,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
+ tcp_rack_update_reo_wnd(sk, &rs);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
/* If needed, reset TLP/RTO timer; RACK may later override this. */
@@ -3676,8 +3680,7 @@ no_queue:
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (tcp_send_head(sk))
- tcp_ack_probe(sk);
+ tcp_ack_probe(sk);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3719,6 +3722,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt;
}
+static void smc_parse_options(const struct tcphdr *th,
+ struct tcp_options_received *opt_rx,
+ const unsigned char *ptr,
+ int opsize)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (th->syn && !(opsize & 1) &&
+ opsize >= TCPOLEN_EXP_SMC_BASE &&
+ get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+ opt_rx->smc_ok = 1;
+ }
+#endif
+}
+
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
@@ -3826,6 +3844,9 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true);
+ else
+ smc_parse_options(th, opt_rx, ptr,
+ opsize);
break;
}
@@ -3993,6 +4014,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk)
{
+ trace_tcp_receive_reset(sk);
+
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
@@ -4115,7 +4138,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4150,7 +4173,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
- if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4330,7 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk)
p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
@@ -4394,7 +4417,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
@@ -4453,7 +4476,7 @@ coalesce_done:
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
@@ -4498,9 +4521,7 @@ insert:
merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4515,7 +4536,7 @@ merge_right:
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;
add_sack:
@@ -4701,7 +4722,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4722,7 +4743,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
}
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
-static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -4730,7 +4751,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
@@ -4777,7 +4798,7 @@ restart:
* overlaps to the next one.
*/
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
- (tcp_win_from_space(skb->truesize) > skb->len ||
+ (tcp_win_from_space(sk, skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start))) {
end_of_skbs = false;
break;
@@ -4849,26 +4870,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;
- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);
/* Range is terminated when we see a gap or when
* we are at the queue end.
@@ -4911,14 +4925,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -5093,7 +5107,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sysctl_tcp_stdurg)
+ if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
@@ -5513,20 +5527,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
security_inet_conn_established(sk, skb);
}
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_jiffies32;
- tcp_init_buffer_space(sk);
-
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -5540,7 +5547,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
struct tcp_fastopen_cookie *cookie)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+ struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
bool syn_drop = false;
@@ -5575,9 +5582,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
- tcp_for_write_queue_from(data, sk) {
- if (data == tcp_send_head(sk) ||
- __tcp_retransmit_skb(sk, data, 1))
+ skb_rbtree_walk_from(data) {
+ if (__tcp_retransmit_skb(sk, data, 1))
break;
}
tcp_rearm_rto(sk);
@@ -5595,6 +5601,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return false;
}
+static void smc_check_reset_syn(struct tcp_sock *tp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && !tp->rx_opt.smc_ok)
+ tp->syn_smc = 0;
+ }
+#endif
+}
+
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
@@ -5690,10 +5706,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tp->tcp_header_len = sizeof(struct tcphdr);
}
- if (tcp_is_sack(tp) && sysctl_tcp_fack)
+ if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(tp);
- tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -5702,6 +5717,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
+ smc_check_reset_syn(tp);
+
smp_mb();
tcp_finish_connect(sk, skb);
@@ -5919,15 +5936,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (req) {
inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
+ */
+ tcp_rearm_rto(sk);
} else {
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
-
- tcp_mtup_init(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->copied_seq = tp->rcv_nxt;
- tcp_init_buffer_space(sk);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5947,19 +5967,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- if (req) {
- /* Re-arm the timer because data may have been sent out.
- * This is similar to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more aggressive and
- * retransmitting any data sooner based on when they
- * are sent out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
@@ -6056,6 +6063,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
+ /* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
@@ -6164,6 +6172,9 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb);
+#if IS_ENABLED(CONFIG_SMC)
+ ireq->smc_ok = rx_opt->smc_ok;
+#endif
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
@@ -6177,7 +6188,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
@@ -6339,7 +6350,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_openreq_init_rwin(req, sk, dst);
if (!want_cookie) {
tcp_reqsk_record_syn(sk, req, skb);
- fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);
+ fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
}
if (fastopen_sk) {
af_ops->send_synack(fastopen_sk, dst, &fl, req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad3b5bb..0162c57 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -85,6 +85,8 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
+#include <trace/events/tcp.h>
+
#ifdef CONFIG_TCP_MD5SIG
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
__be32 daddr, __be32 saddr, const struct tcphdr *th);
@@ -480,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
BUG_ON(!skb);
tcp_mstamp_refresh(tp);
@@ -701,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
*/
- if (sk)
+ if (sk) {
arg.bound_dev_if = sk->sk_bound_dev_if;
+ trace_tcp_send_reset(sk, skb);
+ }
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
@@ -877,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -889,7 +893,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1265,10 +1269,11 @@ static void tcp_v4_init_req(struct request_sock *req,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = sock_net(sk_listener);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(sock_net(sk_listener), skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1355,10 +1360,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1403,9 +1407,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1416,6 +1423,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
@@ -1503,23 +1511,23 @@ csum_err:
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
-void tcp_v4_early_demux(struct sk_buff *skb)
+int tcp_v4_early_demux(struct sk_buff *skb)
{
const struct iphdr *iph;
const struct tcphdr *th;
struct sock *sk;
if (skb->pkt_type != PACKET_HOST)
- return;
+ return 0;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
- return;
+ return 0;
iph = ip_hdr(skb);
th = tcp_hdr(skb);
if (th->doff < sizeof(struct tcphdr) / 4)
- return;
+ return 0;
sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
iph->saddr, th->source,
@@ -1538,6 +1546,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb_dst_set_noref(skb, dst);
}
}
+ return 0;
}
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
@@ -1778,8 +1787,9 @@ do_time_wait:
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v4_timewait_ack(sk, skb);
break;
@@ -1859,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ trace_tcp_destroy_sock(sk);
+
tcp_clear_xmit_timers(sk);
tcp_cleanup_congestion_control(sk);
@@ -1891,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
+ tcp_fastopen_destroy_cipher(sk);
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
@@ -2471,6 +2484,31 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+ net->ipv4.sysctl_tcp_early_retrans = 3;
+ net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION;
+ net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */
+ net->ipv4.sysctl_tcp_retrans_collapse = 1;
+ net->ipv4.sysctl_tcp_max_reordering = 300;
+ net->ipv4.sysctl_tcp_dsack = 1;
+ net->ipv4.sysctl_tcp_app_win = 31;
+ net->ipv4.sysctl_tcp_adv_win_scale = 1;
+ net->ipv4.sysctl_tcp_frto = 2;
+ net->ipv4.sysctl_tcp_moderate_rcvbuf = 1;
+ /* This limits the percentage of the congestion window which we
+ * will allow a single TSO frame to consume. Building TSO frames
+ * which are too large can cause TCP streams to be bursty.
+ */
+ net->ipv4.sysctl_tcp_tso_win_divisor = 3;
+ /* Default TSQ limit of four TSO segments */
+ net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+ /* rfc5961 challenge ack rate limiting */
+ net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
+ net->ipv4.sysctl_tcp_min_tso_segs = 2;
+ net->ipv4.sysctl_tcp_min_rtt_wlen = 300;
+ net->ipv4.sysctl_tcp_autocorking = 1;
+ net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2;
+ net->ipv4.sysctl_tcp_pacing_ss_ratio = 200;
+ net->ipv4.sysctl_tcp_pacing_ca_ratio = 120;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0ab78ab..9d5ddeb 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/jiffies.h>
@@ -20,8 +21,6 @@
#include <net/tcp.h>
#include <net/genetlink.h>
-int sysctl_tcp_nometrics_save __read_mostly;
-
static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr,
const struct inetpeer_addr *daddr,
struct net *net, unsigned int hash);
@@ -330,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (sysctl_tcp_nometrics_save || !dst)
+ if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
return;
rcu_read_lock();
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 188a6f3..4bb8658 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -23,13 +23,12 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/workqueue.h>
+#include <linux/static_key.h>
#include <net/tcp.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#include <net/busy_poll.h>
-int sysctl_tcp_abort_on_overflow __read_mostly;
-
static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
{
if (seq == s_win)
@@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (sysctl_tcp_rfc1337 == 0) {
+ if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
key = tp->af_specific->md5_lookup(sk, sk);
if (key) {
tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC);
- if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool())
- BUG();
+ BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool());
}
} while (0);
#endif
@@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req,
full_space = rcv_wnd * mss;
/* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(full_space,
+ tcp_select_initial_window(sk_listener, full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
&req->rsk_rcv_wnd,
&req->rsk_window_clamp,
@@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
}
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
+ struct request_sock *req,
+ struct tcp_sock *newtp)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ struct inet_request_sock *ireq;
+
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ ireq = inet_rsk(req);
+ if (oldtp->syn_smc && !ireq->smc_ok)
+ newtp->syn_smc = 0;
+ }
+#endif
+}
+
/* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
*
@@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk);
+ struct tcp_sock *oldtp = tcp_sk(sk);
+
+ smc_check_reset_syn_req(oldtp, req, newtp);
/* Now setup tcp_sock */
newtp->pred_flags = 0;
@@ -446,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
INIT_LIST_HEAD(&newtp->tsq_node);
+ INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
tcp_init_wl(newtp, treq->rcv_isn);
@@ -493,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
- if (sysctl_tcp_fack)
+ if (sock_net(sk)->ipv4.sysctl_tcp_fack)
tcp_enable_fack(newtp);
}
newtp->window_clamp = req->rsk_window_clamp;
@@ -534,6 +551,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->syn_data_acked = 0;
newtp->rack.mstamp = 0;
newtp->rack.advanced = 0;
+ newtp->rack.reo_wnd_steps = 1;
+ newtp->rack.last_delivered = 0;
+ newtp->rack.reo_wnd_persist = 0;
+ newtp->rack.dsack_seen = 0;
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
@@ -764,7 +785,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
- if (!sysctl_tcp_abort_on_overflow) {
+ if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
}
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 1ff7398..0b5a05b 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
* nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
* nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
* nv_rtt_factor RTT averaging factor
- * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur
+ * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
* nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
* nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
* nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
-static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
static int nv_cwnd_growth_rate_neg __read_mostly = 8;
static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -101,6 +101,11 @@ struct tcpnv {
u32 nv_last_rtt; /* last rtt */
u32 nv_min_rtt; /* active min rtt. Used to determine slope */
u32 nv_min_rtt_new; /* min rtt for future use */
+ u32 nv_base_rtt; /* If non-zero it represents the threshold for
+ * congestion */
+ u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
+ * set to 80% of nv_base_rtt. It helps reduce
+ * unfairness between flows */
u32 nv_rtt_max_rate; /* max rate seen during current RTT */
u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
* acking beyond nv_rtt_start_seq */
@@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
static void tcpnv_init(struct sock *sk)
{
struct tcpnv *ca = inet_csk_ca(sk);
+ int base_rtt;
tcpnv_reset(ca, sk);
+ /* See if base_rtt is available from socket_ops bpf program.
+ * It is meant to be used in environments, such as communication
+ * within a datacenter, where we have reasonable estimates of
+ * RTTs
+ */
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ if (base_rtt > 0) {
+ ca->nv_base_rtt = base_rtt;
+ ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
+ } else {
+ ca->nv_base_rtt = 0;
+ ca->nv_lower_bound_rtt = 0;
+ }
+
ca->nv_allow_cwnd_growth = 1;
ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
ca->nv_min_rtt = NV_INIT_RTT;
@@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
ca->cwnd_growth_factor = 0;
}
+/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
+ * bounds to RTT.
+ */
+inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
+{
+ if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
+ return ca->nv_lower_bound_rtt;
+ else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
+ return ca->nv_base_rtt;
+ else
+ return val;
+}
+
static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -209,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
struct tcp_sock *tp = tcp_sk(sk);
struct tcpnv *ca = inet_csk_ca(sk);
unsigned long now = jiffies;
- s64 rate64 = 0;
+ u64 rate64;
u32 rate, max_win, cwnd_by_slope;
u32 avg_rtt;
u32 bytes_acked = 0;
@@ -251,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
}
/* rate in 100's bits per second */
- rate64 = ((u64)sample->in_flight) * 8000000;
- rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+ rate64 = ((u64)sample->in_flight) * 80000;
+ do_div(rate64, avg_rtt ?: 1);
+ rate = (u32)rate64;
/* Remember the maximum rate seen during this RTT
* Note: It may be more than one RTT. This function should be
@@ -265,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->nv_eval_call_cnt < 255)
ca->nv_eval_call_cnt++;
+ /* Apply bounds to rtt. Only used to update min_rtt */
+ avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
+
/* update min rtt if necessary */
if (avg_rtt < ca->nv_min_rtt)
ca->nv_min_rtt = avg_rtt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0bc9e46..a9d917e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -41,40 +41,25 @@
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
+#include <linux/static_key.h>
-/* People can turn this off for buggy TCP's found in printers etc. */
-int sysctl_tcp_retrans_collapse __read_mostly = 1;
-
-/* People can turn this on to work with those rare, broken TCPs that
- * interpret the window field as a signed quantity.
- */
-int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
-
-/* Default TSQ limit of four TSO segments */
-int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
-
-/* This limits the percentage of the congestion window which we
- * will allow a single TSO frame to consume. Building TSO frames
- * which are too large can cause TCP streams to be bursty.
- */
-int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-
-/* By default, RFC2861 behavior. */
-int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+#include <trace/events/tcp.h>
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss)
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
-void tcp_select_initial_window(int __space, __u32 mss,
+void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
@@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sysctl_tcp_workaround_signed_windows)
+ if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
@@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk)
/* Make sure we do not exceed the maximum possible
* scaled window.
*/
- if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
+ if (!tp->rx_opt.rcv_wscale &&
+ sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
@@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
+#define OPTION_SMC (1 << 9)
+
+static void smc_options_write(__be32 *ptr, u16 *options)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (unlikely(OPTION_SMC & *options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_EXP << 8) |
+ (TCPOLEN_EXP_SMC_BASE));
+ *ptr++ = htonl(TCPOPT_SMC_MAGIC);
+ }
+ }
+#endif
+}
struct tcp_out_options {
u16 options; /* bit field of OPTION_* */
@@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
}
ptr += (len + 3) >> 2;
}
+
+ smc_options_write(ptr, &options);
+}
+
+static void smc_set_option(const struct tcp_sock *tp,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+#endif
+}
+
+static void smc_set_option_cond(const struct tcp_sock *tp,
+ const struct inet_request_sock *ireq,
+ struct tcp_out_options *opts,
+ unsigned int *remaining)
+{
+#if IS_ENABLED(CONFIG_SMC)
+ if (static_branch_unlikely(&tcp_have_smc)) {
+ if (tp->syn_smc && ireq->smc_ok) {
+ if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+ opts->options |= OPTION_SMC;
+ *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
+ }
+ }
+ }
+#endif
}
/* Compute TCP options for SYN packets. This is not the final
@@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
}
+ smc_set_option(tp, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
/* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct request_sock *req,
+static unsigned int tcp_synack_options(const struct sock *sk,
+ struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
@@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
}
}
+ smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+
return MAX_TCP_OPTION_SPACE - remaining;
}
@@ -739,8 +780,10 @@ static void tcp_tsq_handler(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tp->lost_out > tp->retrans_out &&
- tp->snd_cwnd > tcp_packets_in_flight(tp))
+ tp->snd_cwnd > tcp_packets_in_flight(tp)) {
+ tcp_mstamp_refresh(tp);
tcp_xmit_retransmit_queue(sk);
+ }
tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
0, GFP_ATOMIC);
@@ -971,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
HRTIMER_MODE_ABS_PINNED);
}
+static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ skb->skb_mstamp = tp->tcp_mstamp;
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+}
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -1003,10 +1052,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
oskb = skb;
- if (unlikely(skb_cloned(skb)))
- skb = pskb_copy(skb, gfp_mask);
- else
- skb = skb_clone(skb, gfp_mask);
+
+ tcp_skb_tsorted_save(oskb) {
+ if (unlikely(skb_cloned(oskb)))
+ skb = pskb_copy(oskb, gfp_mask);
+ else
+ skb = skb_clone(oskb, gfp_mask);
+ } tcp_skb_tsorted_restore(oskb);
+
if (unlikely(!skb))
return -ENOBUFS;
}
@@ -1127,7 +1180,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = net_xmit_eval(err);
}
if (!err && oskb) {
- oskb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, oskb);
tcp_rate_skb_sent(sk, oskb);
}
return err;
@@ -1239,12 +1292,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
TCP_SKB_CB(skb)->eor = 0;
}
+/* Insert buff after skb on the write or rtx queue of sk. */
+static void tcp_insert_write_queue_after(struct sk_buff *skb,
+ struct sk_buff *buff,
+ struct sock *sk,
+ enum tcp_queue tcp_queue)
+{
+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+ __skb_queue_after(&sk->sk_write_queue, skb, buff);
+ else
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1327,7 +1393,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+ if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE)
+ list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
return 0;
}
@@ -1605,7 +1673,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sysctl_tcp_slow_start_after_idle &&
+ if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1614,10 +1682,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
* is caused by insufficient sender buffer:
* 1) just sent some data (see tcp_write_xmit)
* 2) not cwnd limited (this else condition)
- * 3) no more data to send (null tcp_send_head )
+ * 3) no more data to send (tcp_write_queue_empty())
* 4) application is hitting buffer limit (SOCK_NOSPACE)
*/
- if (!tcp_send_head(sk) && sk->sk_socket &&
+ if (tcp_write_queue_empty(sk) && sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1692,7 +1760,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
return tso_segs ? :
- tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+ tcp_tso_autosize(sk, mss_now,
+ sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
}
/* Returns the portion of skb which can be sent right away */
@@ -1813,7 +1882,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, unsigned int len,
unsigned int mss_now, gfp_t gfp)
{
struct sk_buff *buff;
@@ -1822,7 +1892,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now, gfp);
+ return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
@@ -1858,7 +1928,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
return 0;
}
@@ -1908,7 +1978,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
goto send_now;
- win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
+ win_divisor = ACCESS_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor);
if (win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -1928,8 +1998,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
goto send_now;
}
- head = tcp_write_queue_head(sk);
-
+ /* TODO : use tsorted_sent_queue ? */
+ head = tcp_rtx_queue_head(sk);
+ if (!head)
+ goto send_now;
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
@@ -2060,6 +2132,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2143,17 +2216,17 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
unsigned int limit;
limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit = min_t(u32, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send the 1st or 2nd skb in write queue.
+ /* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (skb == sk->sk_write_queue.next ||
- skb->prev == sk->sk_write_queue.next)
+ if (tcp_rtx_queue_empty(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2204,7 +2277,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
* it's the "most interesting" or current chrono we are
* tracking and starts busy chrono if we have pending data.
*/
- if (tcp_write_queue_empty(sk))
+ if (tcp_rtx_and_write_queues_empty(sk))
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
else if (type == tp->chrono_type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2237,6 +2310,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
sent_pkts = 0;
+ tcp_mstamp_refresh(tp);
if (!push_one) {
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
@@ -2248,7 +2322,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
}
max_segs = tcp_tso_segs(sk, mss_now);
- tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
@@ -2260,7 +2333,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
goto repair; /* Skip network transmission */
}
@@ -2299,7 +2372,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle);
if (skb->len > limit &&
- unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+ unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, limit, mss_now, gfp)))
break;
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2339,7 +2413,7 @@ repair:
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
- return !tp->packets_out && tcp_send_head(sk);
+ return !tp->packets_out && !tcp_write_queue_empty(sk);
}
bool tcp_schedule_loss_probe(struct sock *sk)
@@ -2347,6 +2421,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, rto_delta_us;
+ int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
@@ -2354,16 +2429,17 @@ bool tcp_schedule_loss_probe(struct sock *sk)
if (tp->fastopen_rsk)
return false;
+ early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
+ if ((early_retrans != 3 && early_retrans != 4) ||
!tp->packets_out || !tcp_is_sack(tp) ||
icsk->icsk_ca_state != TCP_CA_Open)
return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- tcp_send_head(sk))
+ !tcp_write_queue_empty(sk))
return false;
/* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2416,18 +2492,14 @@ void tcp_send_loss_probe(struct sock *sk)
int mss = tcp_current_mss(sk);
skb = tcp_send_head(sk);
- if (skb) {
- if (tcp_snd_wnd_test(tp, skb, mss)) {
- pcount = tp->packets_out;
- tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
- if (tp->packets_out > pcount)
- goto probe_sent;
- goto rearm_timer;
- }
- skb = tcp_write_queue_prev(sk, skb);
- } else {
- skb = tcp_write_queue_tail(sk);
+ if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+ pcount = tp->packets_out;
+ tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ if (tp->packets_out > pcount)
+ goto probe_sent;
+ goto rearm_timer;
}
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
@@ -2445,10 +2517,11 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ (pcount - 1) * mss, mss,
GFP_ATOMIC)))
goto rearm_timer;
- skb = tcp_write_queue_next(sk, skb);
+ skb = skb_rb_next(skb);
}
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2648,7 +2721,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+ struct sk_buff *next_skb = skb_rb_next(skb);
int skb_size, next_skb_size;
skb_size = skb->len;
@@ -2663,9 +2736,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
else if (!skb_shift(skb, next_skb, next_skb_size))
return false;
}
- tcp_highest_sack_combine(sk, next_skb, skb);
-
- tcp_unlink_write_queue(next_skb, sk);
+ tcp_highest_sack_replace(sk, next_skb, skb);
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2694,7 +2765,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
- sk_wmem_free_skb(sk, next_skb);
+ tcp_rtx_queue_unlink_and_free(next_skb, sk);
return true;
}
@@ -2705,8 +2776,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
- if (skb == tcp_send_head(sk))
- return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -2724,12 +2793,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sysctl_tcp_retrans_collapse)
+ if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
- tcp_for_write_queue_from_safe(skb, tmp, sk) {
+ skb_rbtree_walk_from_safe(skb, tmp) {
if (!tcp_can_collapse(sk, skb))
break;
@@ -2804,7 +2873,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
len = cur_mss * segs;
if (skb->len > len) {
- if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+ cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
if (skb_unclone(skb, GFP_ATOMIC))
@@ -2838,17 +2908,23 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
- if (!err)
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_skb_tsorted_save(skb) {
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } tcp_skb_tsorted_restore(skb);
+
+ if (!err) {
+ tcp_update_skb_after_send(tp, skb);
+ tcp_rate_skb_sent(sk, skb);
+ }
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
@@ -2892,29 +2968,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb, *rtx_head, *hole = NULL;
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
- struct sk_buff *hole = NULL;
u32 max_segs;
int mib_idx;
if (!tp->packets_out)
return;
- if (tp->retransmit_skb_hint) {
- skb = tp->retransmit_skb_hint;
- } else {
- skb = tcp_write_queue_head(sk);
- }
-
+ rtx_head = tcp_rtx_queue_head(sk);
+ skb = tp->retransmit_skb_hint ?: rtx_head;
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
__u8 sacked;
int segs;
- if (skb == tcp_send_head(sk))
- break;
-
if (tcp_pacing_check(sk))
break;
@@ -2959,7 +3027,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
- if (skb == tcp_write_queue_head(sk) &&
+ if (skb == rtx_head &&
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
@@ -3001,12 +3069,15 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+ if (!tskb && tcp_under_memory_pressure(sk))
+ tskb = skb_rb_last(&sk->tcp_rtx_queue);
+
+ if (tskb) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (!tcp_send_head(sk)) {
+ if (tcp_write_queue_empty(sk)) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
@@ -3023,6 +3094,7 @@ coalesce:
goto coalesce;
return;
}
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
skb_reserve(skb, MAX_TCP_HEADER);
sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3059,6 +3131,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
+
+ /* skb of trace_tcp_send_reset() keeps the skb that caused RST,
+ * skb here is different to the troublesome skb, so use NULL
+ */
+ trace_tcp_send_reset(sk, NULL);
}
/* Send a crossed SYN-ACK during socket establishment.
@@ -3071,20 +3148,24 @@ int tcp_send_synack(struct sock *sk)
{
struct sk_buff *skb;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
- pr_debug("%s: wrong queue state\n", __func__);
+ pr_err("%s: wrong queue state\n", __func__);
return -EFAULT;
}
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ tcp_skb_tsorted_save(skb) {
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ } tcp_skb_tsorted_restore(skb);
if (!nskb)
return -ENOMEM;
- tcp_unlink_write_queue(skb, sk);
+ INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
- __tcp_add_write_queue_head(sk, nskb);
- sk_wmem_free_skb(sk, skb);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
@@ -3161,8 +3242,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
- tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
- sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+ foc) + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -3175,13 +3256,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
skb->mark = ireq->ir_mark;
- /* Setting of flags are superfluous here for callers (and ECE is
- * not even correctly set)
- */
- tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
- TCPHDR_SYN | TCPHDR_ACK);
-
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ th->seq = htonl(tcp_rsk(req)->snt_isn);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
@@ -3268,7 +3344,7 @@ static void tcp_connect_init(struct sock *sk)
if (rcv_wnd == 0)
rcv_wnd = dst_metric(dst, RTAX_INITRWND);
- tcp_select_initial_window(tcp_full_space(sk),
+ tcp_select_initial_window(sk, tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
@@ -3307,7 +3383,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
tcb->end_seq += skb->len;
__skb_header_release(skb);
- __tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq;
@@ -3355,6 +3430,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
int copied = copy_from_iter(skb_put(syn_data, space), space,
&fo->data->msg_iter);
if (unlikely(!copied)) {
+ tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data);
goto fallback;
}
@@ -3385,12 +3461,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
if (!err) {
tp->syn_data = (fo->copied > 0);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
- /* data was not sent, this is our new send_head */
- sk->sk_send_head = syn_data;
+ /* data was not sent, put it in write_queue */
+ __skb_queue_tail(&sk->sk_write_queue, syn_data);
tp->packets_out -= tcp_skb_pcount(syn_data);
fallback:
@@ -3433,6 +3510,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3627,7 +3705,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(skb, mss);
@@ -3657,7 +3736,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || tcp_write_queue_empty(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
@@ -3698,6 +3777,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
+ trace_tcp_retransmit_synack(sk, req);
}
return res;
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 449cd91..d3ea890 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -1,8 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/tcp.h>
#include <net/tcp.h>
-int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION;
-
static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -45,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
+ u32 min_rtt = tcp_min_rtt(tp);
+ struct sk_buff *skb, *n;
u32 reo_wnd;
*reo_timeout = 0;
@@ -55,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
* to queuing or delayed ACKs.
*/
reo_wnd = 1000;
- if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
- reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
+ if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) {
+ reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd);
+ reo_wnd = min(reo_wnd, tp->srtt_us >> 3);
+ }
- tcp_for_write_queue(skb, sk) {
+ list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
+ tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ s32 remaining;
- if (skb == tcp_send_head(sk))
- break;
-
- /* Skip ones already (s)acked */
- if (!after(scb->end_seq, tp->snd_una) ||
- scb->sacked & TCPCB_SACKED_ACKED)
+ /* Skip ones marked lost but not yet retransmitted */
+ if ((scb->sacked & TCPCB_LOST) &&
+ !(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
- tp->rack.end_seq, scb->end_seq)) {
- /* Step 3 in draft-cheng-tcpm-rack-00.txt:
- * A packet is lost if its elapsed time is beyond
- * the recent RTT plus the reordering window.
- */
- u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
- skb->skb_mstamp);
- s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
-
- if (remaining < 0) {
- tcp_rack_mark_skb_lost(sk, skb);
- continue;
- }
-
- /* Skip ones marked lost but not yet retransmitted */
- if ((scb->sacked & TCPCB_LOST) &&
- !(scb->sacked & TCPCB_SACKED_RETRANS))
- continue;
+ if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+ tp->rack.end_seq, scb->end_seq))
+ break;
+ /* A packet is lost if it has not been s/acked beyond
+ * the recent RTT plus the reordering window.
+ */
+ remaining = tp->rack.rtt_us + reo_wnd -
+ tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+ if (remaining < 0) {
+ tcp_rack_mark_skb_lost(sk, skb);
+ list_del_init(&skb->tcp_tsorted_anchor);
+ } else {
/* Record maximum wait time (+1 to avoid 0) */
*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
-
- } else if (!(scb->sacked & TCPCB_RETRANS)) {
- /* Original data are sent sequentially so stop early
- * b/c the rest are all sent after rack_sent
- */
- break;
}
}
}
@@ -175,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk)
if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
tcp_rearm_rto(sk);
}
+
+/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
+ *
+ * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * by srtt), since there is possibility that spurious retransmission was
+ * due to reordering delay longer than reo_wnd.
+ *
+ * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
+ * no. of successful recoveries (accounts for full DSACK-based loss
+ * recovery undo). After that, reset it to default (min_rtt/4).
+ *
+ * At max, reo_wnd is incremented only once per rtt. So that the new
+ * DSACK on which we are reacting, is due to the spurious retx (approx)
+ * after the reo_wnd has been updated last time.
+ *
+ * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
+ * absolute value to account for change in rtt.
+ */
+void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ !rs->prior_delivered)
+ return;
+
+ /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
+ if (before(rs->prior_delivered, tp->rack.last_delivered))
+ tp->rack.dsack_seen = 0;
+
+ /* Adjust the reo_wnd if update is pending */
+ if (tp->rack.dsack_seen) {
+ tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
+ tp->rack.reo_wnd_steps + 1);
+ tp->rack.dsack_seen = 0;
+ tp->rack.last_delivered = tp->delivered;
+ tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
+ } else if (!tp->rack.reo_wnd_persist) {
+ tp->rack.reo_wnd_steps = 1;
+ }
+}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 655dd8d..16df6dd 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,8 +22,6 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-int sysctl_tcp_thin_linear_timeouts __read_mostly;
-
/**
* tcp_write_err() - close socket and save error info
* @sk: The socket the error has appeared on.
@@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive)
static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
{
- struct net *net = sock_net(sk);
+ const struct net *net = sock_net(sk);
+ int mss;
/* Black hole detection */
- if (net->ipv4.sysctl_tcp_mtu_probing) {
- if (!icsk->icsk_mtup.enabled) {
- icsk->icsk_mtup.enabled = 1;
- icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- } else {
- struct net *net = sock_net(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- int mss;
-
- mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, 68 - tp->tcp_header_len);
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
- tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- }
+ if (!net->ipv4.sysctl_tcp_mtu_probing)
+ return;
+
+ if (!icsk->icsk_mtup.enabled) {
+ icsk->icsk_mtup.enabled = 1;
+ icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
+ } else {
+ mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
+ mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
+ mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
@@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk,
return false;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (unlikely(!start_ts))
- start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
+ if (unlikely(!start_ts)) {
+ struct sk_buff *head = tcp_rtx_queue_head(sk);
+
+ if (!head)
+ return false;
+ start_ts = tcp_skb_timestamp(head);
+ }
if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -283,15 +283,17 @@ out:
*
* Returns: Nothing (void)
*/
-static void tcp_delack_timer(unsigned long data)
+static void tcp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk);
} else {
- inet_csk(sk)->icsk_ack.blocked = 1;
+ icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -304,11 +306,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
u32 start_ts;
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
return;
}
@@ -321,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+ start_ts = tcp_skb_timestamp(skb);
if (!start_ts)
- tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) >
jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -408,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out)
goto out;
- WARN_ON(tcp_write_queue_empty(sk));
+ WARN_ON(tcp_rtx_queue_empty(sk));
tp->tlp_high_seq = 0;
@@ -441,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk)
goto out;
}
tcp_enter_loss(sk);
- tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
+ tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
__sk_dst_reset(sk);
goto out_reset_timer;
}
@@ -473,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
- if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
+ if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
* do not backoff.
*/
@@ -514,7 +517,7 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
@@ -570,9 +573,11 @@ out:
sk_mem_reclaim(sk);
}
-static void tcp_write_timer(unsigned long data)
+static void tcp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
@@ -607,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
EXPORT_SYMBOL_GPL(tcp_set_keepalive);
-static void tcp_keepalive_timer (unsigned long data)
+static void tcp_keepalive_timer (struct timer_list *t)
{
- struct sock *sk = (struct sock *) data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed;
@@ -647,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || tcp_send_head(sk))
+ if (tp->packets_out || !tcp_write_queue_empty(sk))
goto resched;
elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 248cfc0..4f24d0e 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* TCP Vegas congestion control interface
*/
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 784ced0..a6699af 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot)
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -1061,7 +1058,7 @@ back_from_confirm:
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- net_dbg_ratelimited("cork app bug 2\n");
+ net_dbg_ratelimited("socket already corked\n");
err = -EINVAL;
goto out;
}
@@ -1144,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
if (unlikely(!up->pending)) {
release_sock(sk);
- net_dbg_ratelimited("udp cork app bug 3\n");
+ net_dbg_ratelimited("cork failed\n");
return -EINVAL;
}
@@ -2220,9 +2217,10 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net,
return NULL;
}
-void udp_v4_early_demux(struct sk_buff *skb)
+int udp_v4_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct in_device *in_dev = NULL;
const struct iphdr *iph;
const struct udphdr *uh;
struct sock *sk = NULL;
@@ -2233,25 +2231,21 @@ void udp_v4_early_demux(struct sk_buff *skb)
/* validate the packet */
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
- return;
+ return 0;
iph = ip_hdr(skb);
uh = udp_hdr(skb);
- if (skb->pkt_type == PACKET_BROADCAST ||
- skb->pkt_type == PACKET_MULTICAST) {
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ if (skb->pkt_type == PACKET_MULTICAST) {
+ in_dev = __in_dev_get_rcu(skb->dev);
if (!in_dev)
- return;
+ return 0;
- /* we are supposed to accept bcast packets */
- if (skb->pkt_type == PACKET_MULTICAST) {
- ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
- iph->protocol);
- if (!ours)
- return;
- }
+ ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+ iph->protocol);
+ if (!ours)
+ return 0;
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr,
@@ -2262,7 +2256,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
}
if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
- return;
+ return 0;
skb->sk = sk;
skb->destructor = sock_efree;
@@ -2271,12 +2265,23 @@ void udp_v4_early_demux(struct sk_buff *skb)
if (dst)
dst = dst_check(dst, 0);
if (dst) {
+ u32 itag = 0;
+
/* set noref for now.
* any place which wants to hold dst has to call
* dst_hold_safe()
*/
skb_dst_set_noref(skb, dst);
+
+ /* for unconnected multicast sockets we need to validate
+ * the source on each packet
+ */
+ if (!inet_sk(sk)->inet_daddr && in_dev)
+ return ip_mc_validate_source(skb, iph->daddr,
+ iph->saddr, iph->tos,
+ skb->dev, in_dev, &itag);
}
+ return 0;
}
int udp_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index a8cf8c6..e7d18b1 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP4_IMPL_H
#define _UDP4_IMPL_H
#include <net/udp.h>
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 97658bf..e360d55 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -120,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 1fc6841..e50b7fe 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_input.c
*
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d7bf0b0..05017e2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_policy.c
*
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d6660a8..80c40b4 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_state.c
*
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 10e3423..e0026fa 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TCP/IP (INET6) layer.
#
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f553f72..6233e06f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
-static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_count_addresses(const struct inet6_dev *idev);
static int ipv6_generate_stable_address(struct in6_addr *addr,
u8 dad_count,
const struct inet6_dev *idev);
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT)
/*
* Configured unicast address hash table
*/
@@ -192,8 +194,6 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void inet6_prefix_notify(int event, struct inet6_dev *idev,
struct prefix_info *pinfo);
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev);
static struct ipv6_devconf ipv6_devconf __read_mostly = {
.forwarding = 0,
@@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
+ struct inet6_dev *in6_dev = NULL;
+ struct net_device *dev = NULL;
struct netconfmsg *ncm;
struct sk_buff *skb;
struct ipv6_devconf *devconf;
- struct inet6_dev *in6_dev;
- struct net_device *dev;
int ifindex;
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
devconf_ipv6_policy, extack);
if (err < 0)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
+ err = -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
case NETCONFA_IFINDEX_ALL:
@@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv6.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (!dev)
- goto errout;
- in6_dev = __in6_dev_get(dev);
+ return -EINVAL;
+ in6_dev = in6_dev_get(dev);
if (!in6_dev)
goto errout;
devconf = &in6_dev->cnf;
@@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in6_dev)
+ in6_dev_put(in6_dev);
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -945,12 +949,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
break;
}
- list_add_tail(&ifp->if_list, p);
+ list_add_tail_rcu(&ifp->if_list, p);
+}
+
+static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
-static u32 inet6_addr_hash(const struct in6_addr *addr)
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+ struct net_device *dev, unsigned int hash)
{
- return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
+ struct inet6_ifaddr *ifp;
+
+ hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
+ if (!net_eq(dev_net(ifp->idev->dev), net))
+ continue;
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (!dev || ifp->idev->dev == dev)
+ return true;
+ }
+ }
+ return false;
+}
+
+static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
+{
+ unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr);
+ int err = 0;
+
+ spin_lock(&addrconf_hash_lock);
+
+ /* Ignore adding duplicate addresses on an interface */
+ if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
+ ADBG("ipv6_add_addr: already assigned\n");
+ err = -EEXIST;
+ } else {
+ hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+ }
+
+ spin_unlock(&addrconf_hash_lock);
+
+ return err;
}
/* On success it returns ifp with increased reference count */
@@ -958,13 +1000,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
static struct inet6_ifaddr *
ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
- int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
+ int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+ bool can_block, struct netlink_ext_ack *extack)
{
+ gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
- struct rt6_info *rt;
- struct in6_validator_info i6vi;
- unsigned int hash;
+ struct rt6_info *rt = NULL;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -974,42 +1016,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
addr_type & IPV6_ADDR_LOOPBACK))
return ERR_PTR(-EADDRNOTAVAIL);
- rcu_read_lock_bh();
-
- in6_dev_hold(idev);
-
if (idev->dead) {
err = -ENODEV; /*XXX*/
- goto out2;
+ goto out;
}
if (idev->cnf.disable_ipv6) {
err = -EACCES;
- goto out2;
- }
-
- i6vi.i6vi_addr = *addr;
- i6vi.i6vi_dev = idev;
- rcu_read_unlock_bh();
-
- err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
-
- rcu_read_lock_bh();
- err = notifier_to_errno(err);
- if (err)
- goto out2;
-
- spin_lock(&addrconf_hash_lock);
-
- /* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG("ipv6_add_addr: already assigned\n");
- err = -EEXIST;
goto out;
}
- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+ /* validator notifier needs to be blocking;
+ * do not call in atomic context
+ */
+ if (can_block) {
+ struct in6_validator_info i6vi = {
+ .i6vi_addr = *addr,
+ .i6vi_dev = idev,
+ .extack = extack,
+ };
+
+ err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+ err = notifier_to_errno(err);
+ if (err < 0)
+ goto out;
+ }
+ ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
@@ -1019,6 +1052,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
+ rt = NULL;
goto out;
}
@@ -1049,16 +1083,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa->rt = rt;
ifa->idev = idev;
+ in6_dev_hold(idev);
+
/* For caller */
refcount_set(&ifa->refcnt, 1);
- /* Add to big hash table */
- hash = inet6_addr_hash(addr);
+ rcu_read_lock_bh();
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
- spin_unlock(&addrconf_hash_lock);
+ err = ipv6_add_addr_hash(idev->dev, ifa);
+ if (err < 0) {
+ rcu_read_unlock_bh();
+ goto out;
+ }
write_lock(&idev->lock);
+
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
@@ -1069,21 +1108,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
-out2:
+
rcu_read_unlock_bh();
- if (likely(err == 0))
- inet6addr_notifier_call_chain(NETDEV_UP, ifa);
- else {
- kfree(ifa);
- in6_dev_put(idev);
+ inet6addr_notifier_call_chain(NETDEV_UP, ifa);
+out:
+ if (unlikely(err < 0)) {
+ if (rt)
+ ip6_rt_put(rt);
+ if (ifa) {
+ if (ifa->idev)
+ in6_dev_put(ifa->idev);
+ kfree(ifa);
+ }
ifa = ERR_PTR(err);
}
return ifa;
-out:
- spin_unlock(&addrconf_hash_lock);
- goto out2;
}
enum cleanup_prefix_rt_t {
@@ -1204,7 +1245,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
action = check_cleanup_prefix_route(ifp, &expires);
- list_del_init(&ifp->if_list);
+ list_del_rcu(&ifp->if_list);
__in6_ifa_put(ifp);
write_unlock_bh(&ifp->idev->lock);
@@ -1226,7 +1267,9 @@ out:
in6_ifa_put(ifp);
}
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
+ struct inet6_ifaddr *ift,
+ bool block)
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
@@ -1330,7 +1373,7 @@ retry:
ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft);
+ tmp_valid_lft, tmp_prefered_lft, block, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -1558,8 +1601,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
{
struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
int i;
/*
@@ -1609,11 +1651,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
break;
} else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
swap(hiscore, score);
hiscore_idx = 1 - hiscore_idx;
@@ -1625,7 +1662,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
}
out:
- read_unlock_bh(&idev->lock);
return hiscore_idx;
}
@@ -1662,6 +1698,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
int dst_type;
bool use_oif_addr = false;
int hiscore_idx = 0;
+ int ret = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1737,15 +1774,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
}
out:
- rcu_read_unlock();
-
hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
- return -EADDRNOTAVAIL;
+ ret = -EADDRNOTAVAIL;
+ else
+ *saddr = hiscore->ifa->addr;
- *saddr = hiscore->ifa->addr;
- in6_ifa_put(hiscore->ifa);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
@@ -1785,15 +1821,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
return err;
}
-static int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(const struct inet6_dev *idev)
{
+ const struct inet6_ifaddr *ifp;
int cnt = 0;
- struct inet6_ifaddr *ifp;
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
cnt++;
- read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
return cnt;
}
@@ -1808,11 +1844,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict,
u32 banned_flags)
{
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp;
- unsigned int hash = inet6_addr_hash(addr);
u32 ifp_flags;
- rcu_read_lock_bh();
+ rcu_read_lock();
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
@@ -1826,32 +1862,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
!(ifp_flags&banned_flags) &&
(!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return 1;
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return 0;
}
EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
-static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
- struct net_device *dev)
-{
- unsigned int hash = inet6_addr_hash(addr);
- struct inet6_ifaddr *ifp;
-
- hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) {
- if (!net_eq(dev_net(ifp->idev->dev), net))
- continue;
- if (ipv6_addr_equal(&ifp->addr, addr)) {
- if (!dev || ifp->idev->dev == dev)
- return true;
- }
- }
- return false;
-}
/* Compares an address/prefix_len with addresses on device @dev.
* If one is found it returns true.
@@ -1859,20 +1879,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
bool ret = false;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
if (ret)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1882,22 +1900,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
int onlink;
onlink = 0;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len);
if (onlink)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
return onlink;
@@ -1907,11 +1923,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
struct net_device *dev, int strict)
{
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp, *result = NULL;
- unsigned int hash = inet6_addr_hash(addr);
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1923,7 +1939,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
}
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return result;
}
@@ -1942,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (ifpub) {
in6_ifa_hold(ifpub);
spin_unlock_bh(&ifp->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
} else {
spin_unlock_bh(&ifp->lock);
@@ -1975,7 +1991,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp)
return err;
}
-void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(ifp->idev->dev);
@@ -1985,8 +2001,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
return;
}
- net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n",
- ifp->idev->dev->name, &ifp->addr);
+ net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n",
+ ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source);
spin_lock_bh(&ifp->lock);
@@ -2025,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
scope, flags, valid_lft,
- preferred_lft);
+ preferred_lft, false, NULL);
if (IS_ERR(ifp2))
goto lock_errdad;
@@ -2321,24 +2337,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
if (!fn)
goto out;
- noflags |= RTF_CACHE;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
if ((rt->rt6i_flags & noflags) != 0)
continue;
- dst_hold(&rt->dst);
+ if (!dst_hold_safe(&rt->dst))
+ rt = NULL;
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2442,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
- ipv6_create_tempaddr(ifp, NULL);
+ ipv6_create_tempaddr(ifp, NULL, false);
} else {
read_unlock_bh(&idev->lock);
}
@@ -2483,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
pinfo->prefix_len,
addr_type&IPV6_ADDR_SCOPE_MASK,
addr_flags, valid_lft,
- prefered_lft);
+ prefered_lft, false, NULL);
if (IS_ERR_OR_NULL(ifp))
return -1;
@@ -2793,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
const struct in6_addr *pfx,
const struct in6_addr *peer_pfx,
unsigned int plen, __u32 ifa_flags,
- __u32 prefered_lft, __u32 valid_lft)
+ __u32 prefered_lft, __u32 valid_lft,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2852,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
- valid_lft, prefered_lft);
+ valid_lft, prefered_lft, true, extack);
if (!IS_ERR(ifp)) {
if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2937,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
rtnl_unlock();
return err;
}
@@ -2967,7 +2984,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifp = ipv6_add_addr(idev, addr, NULL, plen,
scope, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
+ true, NULL);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3067,7 +3085,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
#endif
ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp);
@@ -3335,6 +3353,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
if ((ifp->flags & IFA_F_PERMANENT) &&
fixup_permanent_addr(idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
+ in6_ifa_hold(ifp);
ipv6_del_addr(ifp);
write_lock_bh(&idev->lock);
@@ -3562,7 +3581,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
- struct list_head del_list;
int _keep_addr;
bool keep_addr;
int state, i;
@@ -3654,7 +3672,6 @@ restart:
*/
keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
- INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct rt6_info *rt = NULL;
bool keep;
@@ -3663,8 +3680,6 @@ restart:
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- if (!keep)
- list_move(&ifa->if_list, &del_list);
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
@@ -3698,19 +3713,14 @@ restart:
}
write_lock_bh(&idev->lock);
+ if (!keep) {
+ list_del_rcu(&ifa->if_list);
+ in6_ifa_put(ifa);
+ }
}
write_unlock_bh(&idev->lock);
- /* now clean up addresses to be removed */
- while (!list_empty(&del_list)) {
- ifa = list_first_entry(&del_list,
- struct inet6_ifaddr, if_list);
- list_del(&ifa->if_list);
-
- in6_ifa_put(ifa);
- }
-
/* Step 5: Discard anycast and multicast list */
if (how) {
ipv6_ac_destroy_dev(idev);
@@ -3820,8 +3830,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
goto out;
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- dev_net(dev)->ipv6.devconf_all->accept_dad < 1 ||
- idev->cnf.accept_dad < 1 ||
+ (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 &&
+ idev->cnf.accept_dad < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
bump_id = ifp->flags & IFA_F_TENTATIVE;
@@ -4092,9 +4102,9 @@ struct if6_iter_state {
static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
{
- struct inet6_ifaddr *ifa = NULL;
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ struct inet6_ifaddr *ifa = NULL;
int p = 0;
/* initial bucket if pos is 0 */
@@ -4104,7 +4114,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
}
for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
- hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket],
+ hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket],
addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -4130,7 +4140,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct if6_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) {
+ hlist_for_each_entry_continue_rcu(ifa, addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
state->offset++;
@@ -4139,7 +4149,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
while (++state->bucket < IN6_ADDR_HSIZE) {
state->offset = 0;
- hlist_for_each_entry_rcu_bh(ifa,
+ hlist_for_each_entry_rcu(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
@@ -4152,9 +4162,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
}
static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(rcu_bh)
+ __acquires(rcu)
{
- rcu_read_lock_bh();
+ rcu_read_lock();
return if6_get_first(seq, *pos);
}
@@ -4168,9 +4178,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
static void if6_seq_stop(struct seq_file *seq, void *v)
- __releases(rcu_bh)
+ __releases(rcu)
{
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
static int if6_seq_show(struct seq_file *seq, void *v)
@@ -4239,12 +4249,12 @@ void if6_proc_exit(void)
/* Check if address is a home address configured on any interface. */
int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
{
- int ret = 0;
+ unsigned int hash = inet6_addr_hash(net, addr);
struct inet6_ifaddr *ifp = NULL;
- unsigned int hash = inet6_addr_hash(addr);
+ int ret = 0;
- rcu_read_lock_bh();
- hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -4253,7 +4263,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
break;
}
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
#endif
@@ -4343,7 +4353,7 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
- ipv6_create_tempaddr(ifpub, ifp);
+ ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
goto restart;
@@ -4579,7 +4589,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
- preferred_lft, valid_lft);
+ preferred_lft, valid_lft, extack);
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4906,17 +4916,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
extack);
if (err < 0)
- goto errout;
+ return err;
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr) {
- err = -EINVAL;
- goto errout;
- }
+ if (!addr)
+ return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = __dev_get_by_index(net, ifm->ifa_index);
+ dev = dev_get_by_index(net, ifm->ifa_index);
ifa = ipv6_get_ifaddr(net, addr, dev, 1);
if (!ifa) {
@@ -4942,6 +4950,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -5898,10 +5908,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
spin_lock(&ifa->lock);
if (ifa->rt) {
struct rt6_info *rt = ifa->rt;
- struct fib6_table *table = rt->rt6i_table;
int cpu;
- read_lock(&table->tb6_lock);
+ rcu_read_lock();
addrconf_set_nopolicy(ifa->rt, val);
if (rt->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
@@ -5911,7 +5920,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
addrconf_set_nopolicy(*rtp, val);
}
}
- read_unlock(&table->tb6_lock);
+ rcu_read_unlock();
}
spin_unlock(&ifa->lock);
}
@@ -6585,13 +6594,13 @@ int __init addrconf_init(void)
__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, 0);
+ inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
inet6_dump_ifmcaddr, 0);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
inet6_dump_ifacaddr, 0);
__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, 0);
+ inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
ipv6_addr_label_rtnl_register();
@@ -6618,9 +6627,9 @@ void addrconf_cleanup(void)
unregister_pernet_subsys(&addrconf_ops);
ipv6_addr_label_cleanup();
- rtnl_lock();
+ rtnl_af_unregister(&inet6_ops);
- __rtnl_af_unregister(&inet6_ops);
+ rtnl_lock();
/* clean dev list */
for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d..32b564d 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
EXPORT_SYMBOL(__ipv6_addr_type);
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
+static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
int register_inet6addr_notifier(struct notifier_block *nb)
{
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
int register_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inet6addr_validator_notifier);
int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
+ nb);
}
EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
{
- return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+ return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index c6311d7..00e1f8e 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPv6 Address Label subsystem
* for the IPv6 "Default" Source Address Selection
@@ -18,7 +19,6 @@
#include <linux/if_addrlabel.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#include <linux/refcount.h>
#if 0
#define ADDRLABEL(x...) printk(x)
@@ -36,7 +36,6 @@ struct ip6addrlbl_entry {
int addrtype;
u32 label;
struct hlist_node list;
- refcount_t refcnt;
struct rcu_head rcu;
};
@@ -111,28 +110,6 @@ static const __net_initconst struct ip6addrlbl_init_table
}
};
-/* Object management */
-static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
-{
- kfree(p);
-}
-
-static void ip6addrlbl_free_rcu(struct rcu_head *h)
-{
- ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
-}
-
-static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
-{
- return refcount_inc_not_zero(&p->refcnt);
-}
-
-static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
-{
- if (refcount_dec_and_test(&p->refcnt))
- call_rcu(&p->rcu, ip6addrlbl_free_rcu);
-}
-
/* Find label */
static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
const struct in6_addr *addr,
@@ -219,7 +196,6 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
newp->addrtype = addrtype;
newp->label = label;
INIT_HLIST_NODE(&newp->list);
- refcount_set(&newp->refcnt, 1);
return newp;
}
@@ -243,7 +219,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
goto out;
}
hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
goto out;
} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
(p->prefixlen < newp->prefixlen)) {
@@ -281,7 +257,7 @@ static int ip6addrlbl_add(struct net *net,
ret = __ip6addrlbl_add(net, newp, replace);
spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
if (ret)
- ip6addrlbl_free(newp);
+ kfree(newp);
return ret;
}
@@ -302,7 +278,7 @@ static int __ip6addrlbl_del(struct net *net,
p->ifindex == ifindex &&
ipv6_addr_equal(&p->prefix, prefix)) {
hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
ret = 0;
break;
}
@@ -360,7 +336,7 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
spin_lock(&net->ipv6.ip6addrlbl_table.lock);
hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
}
spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
}
@@ -546,38 +522,28 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return -EINVAL;
addr = nla_data(tb[IFAL_ADDRESS]);
- rcu_read_lock();
- p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && !ip6addrlbl_hold(p))
- p = NULL;
- lseq = net->ipv6.ip6addrlbl_table.seq;
- rcu_read_unlock();
-
- if (!p) {
- err = -ESRCH;
- goto out;
- }
-
skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
- if (!skb) {
- ip6addrlbl_put(p);
+ if (!skb)
return -ENOBUFS;
- }
- err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- RTM_NEWADDRLABEL, 0);
+ err = -ESRCH;
- ip6addrlbl_put(p);
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+ lseq = net->ipv6.ip6addrlbl_table.seq;
+ if (p)
+ err = ip6addrlbl_fill(skb, p, lseq,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL, 0);
+ rcu_read_unlock();
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
- goto out;
+ } else {
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
}
-
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-out:
return err;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fe5262f..c26f712 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -810,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
net->ipv6.sysctl.flowlabel_state_ranges = 0;
+ net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT;
+ net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT;
+ net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN;
+ net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72..37bb33f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
case NEXTHDR_DEST:
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
+ /* fall through */
case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
net_dbg_ratelimited("overrun %sopts\n",
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 89910e2..4000b71 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -483,8 +483,8 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
goto out;
}
- if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2))
- BUG();
+ ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2);
+ BUG_ON(ret);
ret = -EINVAL;
padlen = nexthdr[0];
@@ -559,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb)
static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
{
struct xfrm_state *x = xfrm_input_state(skb);
- struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
/* For ESN we move the header forward by 4 bytes to
* accomodate the high bits. We will move it back after
* decryption.
*/
if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = skb_push(skb, 4);
+ struct ip_esp_hdr *esph = skb_push(skb, 4);
+
*seqhi = esph->spi;
esph->spi = esph->seq_no;
esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 9551613..83bd757 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -74,8 +74,20 @@ struct tlvtype_proc {
/* An unknown option is detected, decide what to do */
-static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff,
+ bool disallow_unknowns)
{
+ if (disallow_unknowns) {
+ /* If unknown TLVs are disallowed by configuration
+ * then always silently drop packet. Note this also
+ * means no ICMP parameter problem is sent which
+ * could be a good property to mitigate a reflection DOS
+ * attack.
+ */
+
+ goto drop;
+ }
+
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return true;
@@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
*/
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
break;
+ /* fall through */
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return false;
}
+drop:
kfree_skb(skb);
return false;
}
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
+static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+ struct sk_buff *skb,
+ int max_count)
{
- const struct tlvtype_proc *curr;
+ int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- int len = (skb_transport_header(skb)[1] + 1) << 3;
+ const struct tlvtype_proc *curr;
+ bool disallow_unknowns = false;
+ int tlv_count = 0;
int padlen = 0;
+ if (unlikely(max_count < 0)) {
+ disallow_unknowns = true;
+ max_count = -max_count;
+ }
+
if (skb_transport_offset(skb) + len > skb_headlen(skb))
goto bad;
@@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
default: /* Other TLV code so scan list */
if (optlen > len)
goto bad;
+
+ tlv_count++;
+ if (tlv_count > max_count)
+ goto bad;
+
for (curr = procs; curr->type >= 0; curr++) {
if (curr->type == nh[off]) {
/* type specific length/alignment
@@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
break;
}
}
- if (curr->type < 0) {
- if (ip6_tlvopt_unknown(skb, off) == 0)
- return false;
- }
+ if (curr->type < 0 &&
+ !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
+ return false;
+
padlen = 0;
break;
}
@@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- struct in6_addr tmp_addr;
int ret;
if (opt->dsthao) {
@@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->ip_summed = CHECKSUM_NONE;
- tmp_addr = ipv6h->saddr;
- ipv6h->saddr = hao->addr;
- hao->addr = tmp_addr;
+ swap(ipv6h->saddr, hao->addr);
if (skb->tstamp == 0)
__net_timestamp(skb);
@@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
__u16 dstbuf;
#endif
struct dst_entry *dst = skb_dst(skb);
+ struct net *net = dev_net(skb->dev);
+ int extlen;
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
__IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
IPSTATS_MIB_INHDRERRORS);
+fail_and_free:
kfree_skb(skb);
return -1;
}
+ extlen = (skb_transport_header(skb)[1] + 1) << 3;
+ if (extlen > net->ipv6.sysctl.max_dst_opts_len)
+ goto fail_and_free;
+
opt->lastopt = opt->dst1 = skb_network_header_len(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
- skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
+ init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+ skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
opt->nhoff = dstbuf;
@@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = {
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(skb->dev);
+ int extlen;
/*
* skb_network_header(skb) is equal to skb->data, and
@@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
!pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
((skb_transport_header(skb)[1] + 1) << 3)))) {
+fail_and_free:
kfree_skb(skb);
return -1;
}
+ extlen = (skb_transport_header(skb)[1] + 1) << 3;
+ if (extlen > net->ipv6.sysctl.max_hbh_opts_len)
+ goto fail_and_free;
+
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
- skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb,
+ init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+ skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
return 1;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 115d609..11025f8 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
{
unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
bool found;
if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
start = *offset + sizeof(struct ipv6hdr);
nexthdr = ip6->nexthdr;
}
- len = skb->len - start;
do {
struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
if (!found) {
nexthdr = hp->nexthdr;
- len -= hdrlen;
start += hdrlen;
}
} while (!found);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5acb544..6ae5dd3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
- struct icmp6hdr *thdr, int len)
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
- int err = 0;
skb = skb_peek(&sk->sk_write_queue);
if (!skb)
- goto out;
+ return;
icmp6h = icmp6_hdr(skb);
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
tmp_csum);
}
ip6_push_pending_frames(sk);
-out:
- return err;
}
struct icmpv6_msg {
@@ -438,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
@@ -575,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- err = ip6_append_data(sk, icmpv6_getfrag, &msg,
- len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, &sockc_unused);
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
}
rcu_read_unlock();
out_dst_release:
@@ -682,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct icmpv6_msg msg;
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
struct sockcm_cookie sockc_unused = {0};
@@ -719,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(net, sk, &dst, &fl6);
- if (err)
+ if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
@@ -737,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipc6.dontfrag = np->dontfrag;
ipc6.opt = NULL;
- err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT,
- &sockc_unused);
-
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
+ (struct rt6_info *)dst, MSG_DONTWAIT,
+ &sockc_unused)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- skb->len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
}
dst_release(dst);
out:
@@ -872,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
goto discard_it;
hdr = icmp6_hdr(skb);
- /*
- * Drop through to notify
- */
-
+ /* to notify */
+ /* fall through */
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index e0170f6..3c7a11b 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -55,17 +55,6 @@ struct ila_identifier {
};
};
-enum {
- ILA_ATYPE_IID = 0,
- ILA_ATYPE_LUID,
- ILA_ATYPE_VIRT_V4,
- ILA_ATYPE_VIRT_UNI_V6,
- ILA_ATYPE_VIRT_MULTI_V6,
- ILA_ATYPE_RSVD_1,
- ILA_ATYPE_RSVD_2,
- ILA_ATYPE_RSVD_3,
-};
-
#define CSUM_NEUTRAL_FLAG htonl(0x10000000)
struct ila_addr {
@@ -93,6 +82,7 @@ struct ila_params {
struct ila_locator locator_match;
__wsum csum_diff;
u8 csum_mode;
+ u8 ident_type;
};
static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index aba0998..8c88ecf 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -13,30 +13,37 @@
#include <uapi/linux/ila.h>
#include "ila.h"
-static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+void ila_init_saved_csum(struct ila_params *p)
{
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+ if (!p->locator_match.v64)
+ return;
+ p->csum_diff = compute_csum_diff8(
+ (__be32 *)&p->locator,
+ (__be32 *)&p->locator_match);
+}
+
+static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p)
+{
if (p->locator_match.v64)
return p->csum_diff;
else
- return compute_csum_diff8((__be32 *)&iaddr->loc,
- (__be32 *)&p->locator);
+ return compute_csum_diff8((__be32 *)&p->locator,
+ (__be32 *)&iaddr->loc);
}
-static void ila_csum_do_neutral(struct ila_addr *iaddr,
- struct ila_params *p)
+static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p);
+}
+
+static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr,
+ struct ila_params *p)
{
__sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff, fval;
- /* Check if checksum adjust value has been cached */
- if (p->locator_match.v64) {
- diff = p->csum_diff;
- } else {
- diff = compute_csum_diff8((__be32 *)&p->locator,
- (__be32 *)iaddr);
- }
+ diff = get_csum_diff_iaddr(iaddr, p);
fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ?
CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG);
@@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr,
iaddr->ident.csum_neutral ^= 1;
}
-static void ila_csum_adjust_transport(struct sk_buff *skb,
+static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr,
struct ila_params *p)
{
+ __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3];
__wsum diff;
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
- struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
+
+ diff = get_csum_diff_iaddr(iaddr, p);
+
+ *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust)));
+}
+
+static void ila_csum_adjust_transport(struct sk_buff *skb,
+ struct ila_params *p)
+{
size_t nhoff = sizeof(struct ipv6hdr);
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ __wsum diff;
switch (ip6h->nexthdr) {
case NEXTHDR_TCP:
@@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb,
}
break;
}
-
- /* Now change destination address */
- iaddr->loc = p->locator;
}
void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p,
- bool set_csum_neutral)
+ bool sir2ila)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ila_addr *iaddr = ila_a2i(&ip6h->daddr);
- /* First deal with the transport checksum */
- if (ila_csum_neutral_set(iaddr->ident)) {
- /* C-bit is set in the locator indicating that this
- * is a locator being translated to a SIR address.
- * Perform (receiver) checksum-neutral translation.
- */
- if (!set_csum_neutral)
- ila_csum_do_neutral(iaddr, p);
- } else {
- switch (p->csum_mode) {
- case ILA_CSUM_ADJUST_TRANSPORT:
- ila_csum_adjust_transport(skb, p);
- break;
- case ILA_CSUM_NEUTRAL_MAP:
- ila_csum_do_neutral(iaddr, p);
- break;
- case ILA_CSUM_NO_ACTION:
+ switch (p->csum_mode) {
+ case ILA_CSUM_ADJUST_TRANSPORT:
+ ila_csum_adjust_transport(skb, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP:
+ if (sir2ila) {
+ if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) {
+ /* Checksum flag should never be
+ * set in a formatted SIR address.
+ */
+ break;
+ }
+ } else if (!ila_csum_neutral_set(iaddr->ident)) {
+ /* ILA to SIR translation and C-bit isn't
+ * set so we're good.
+ */
break;
}
+ ila_csum_do_neutral_fmt(iaddr, p);
+ break;
+ case ILA_CSUM_NEUTRAL_MAP_AUTO:
+ ila_csum_do_neutral_nofmt(iaddr, p);
+ break;
+ case ILA_CSUM_NO_ACTION:
+ break;
}
/* Now change destination address */
iaddr->loc = p->locator;
}
-void ila_init_saved_csum(struct ila_params *p)
-{
- if (!p->locator_match.v64)
- return;
-
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator,
- (__be32 *)&p->locator_match);
-}
-
static int __init ila_init(void)
{
int ret;
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 0c02a09..3d56a2f 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
#include <linux/ip.h>
#include <linux/kernel.h>
@@ -19,6 +20,7 @@ struct ila_lwt {
struct ila_params p;
struct dst_cache dst_cache;
u32 connected : 1;
+ u32 lwt_output : 1;
};
static inline struct ila_lwt *ila_lwt_lwtunnel(
@@ -44,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate),
- true);
+ if (ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(orig_dst->lwtstate),
+ true);
if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) {
/* Already have a next hop address in route, no need for
@@ -97,11 +101,15 @@ drop:
static int ila_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate);
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false);
+ if (!ilwt->lwt_output)
+ ila_update_ipv6_locator(skb,
+ ila_params_lwtunnel(dst->lwtstate),
+ false);
return dst->lwtstate->orig_input(skb);
@@ -113,6 +121,8 @@ drop:
static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
+ [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, },
};
static int ila_build_state(struct nlattr *nla,
@@ -126,33 +136,84 @@ static int ila_build_state(struct nlattr *nla,
struct lwtunnel_state *newts;
const struct fib6_config *cfg6 = cfg;
struct ila_addr *iaddr;
+ u8 ident_type = ILA_ATYPE_USE_FORMAT;
+ u8 hook_type = ILA_HOOK_ROUTE_OUTPUT;
+ u8 csum_mode = ILA_CSUM_NO_ACTION;
+ bool lwt_output = true;
+ u8 eff_ident_type;
int ret;
if (family != AF_INET6)
return -EINVAL;
- if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
- /* Need to have full locator and at least type field
- * included in destination
- */
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
return -EINVAL;
- }
iaddr = (struct ila_addr *)&cfg6->fc_dst;
- if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) {
- /* Don't allow translation for a non-ILA address or checksum
- * neutral flag to be set.
+ if (tb[ILA_ATTR_IDENT_TYPE])
+ ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]);
+
+ if (ident_type == ILA_ATYPE_USE_FORMAT) {
+ /* Infer identifier type from type field in formatted
+ * identifier.
*/
+
+ if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) {
+ /* Need to have full locator and at least type field
+ * included in destination
+ */
+ return -EINVAL;
+ }
+
+ eff_ident_type = iaddr->ident.type;
+ } else {
+ eff_ident_type = ident_type;
+ }
+
+ switch (eff_ident_type) {
+ case ILA_ATYPE_IID:
+ /* Don't allow ILA for IID type */
+ return -EINVAL;
+ case ILA_ATYPE_LUID:
+ break;
+ case ILA_ATYPE_VIRT_V4:
+ case ILA_ATYPE_VIRT_UNI_V6:
+ case ILA_ATYPE_VIRT_MULTI_V6:
+ case ILA_ATYPE_NONLOCAL_ADDR:
+ /* These ILA formats are not supported yet. */
+ default:
return -EINVAL;
}
- ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack);
- if (ret < 0)
- return ret;
+ if (tb[ILA_ATTR_HOOK_TYPE])
+ hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]);
+
+ switch (hook_type) {
+ case ILA_HOOK_ROUTE_OUTPUT:
+ lwt_output = true;
+ break;
+ case ILA_HOOK_ROUTE_INPUT:
+ lwt_output = false;
+ break;
+ default:
+ return -EINVAL;
+ }
- if (!tb[ILA_ATTR_LOCATOR])
+ if (tb[ILA_ATTR_CSUM_MODE])
+ csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
+
+ if (csum_mode == ILA_CSUM_NEUTRAL_MAP &&
+ ila_csum_neutral_set(iaddr->ident)) {
+ /* Don't allow translation if checksum neutral bit is
+ * configured and it's set in the SIR address.
+ */
return -EINVAL;
+ }
newts = lwtunnel_state_alloc(sizeof(*ilwt));
if (!newts)
@@ -165,19 +226,18 @@ static int ila_build_state(struct nlattr *nla,
return ret;
}
+ ilwt->lwt_output = !!lwt_output;
+
p = ila_params_lwtunnel(newts);
+ p->csum_mode = csum_mode;
+ p->ident_type = ident_type;
p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
/* Precompute checksum difference for translation since we
* know both the old locator and the new one.
*/
p->locator_match = iaddr->loc;
- p->csum_diff = compute_csum_diff8(
- (__be32 *)&p->locator_match, (__be32 *)&p->locator);
-
- if (tb[ILA_ATTR_CSUM_MODE])
- p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]);
ila_init_saved_csum(p);
@@ -202,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb,
struct lwtunnel_state *lwtstate)
{
struct ila_params *p = ila_params_lwtunnel(lwtstate);
+ struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate);
if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64,
ILA_ATTR_PAD))
goto nla_put_failure;
+
if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode))
goto nla_put_failure;
+ if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE,
+ ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT :
+ ILA_HOOK_ROUTE_INPUT))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -219,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
{
return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */
nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */
+ nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */
0;
}
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 5bd419c..6eb5e68 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/jhash.h>
#include <linux/netfilter.h>
#include <linux/rcupdate.h>
@@ -120,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
+ [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, },
};
static int parse_nl_config(struct genl_info *info,
@@ -137,6 +139,14 @@ static int parse_nl_config(struct genl_info *info,
if (info->attrs[ILA_ATTR_CSUM_MODE])
xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
+ else
+ xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+
+ if (info->attrs[ILA_ATTR_IDENT_TYPE])
+ xp->ip.ident_type = nla_get_u8(
+ info->attrs[ILA_ATTR_IDENT_TYPE]);
+ else
+ xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -197,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg)
}
}
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral);
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila);
static unsigned int
ila_nf_input(void *priv,
@@ -395,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg)
(__force u64)ila->xp.ip.locator_match.v64,
ILA_ATTR_PAD) ||
nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) ||
- nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode))
+ nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) ||
+ nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type))
return -1;
return 0;
@@ -606,7 +617,7 @@ static struct pernet_operations ila_net_ops = {
.size = sizeof(struct ila_net),
};
-static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
+static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
{
struct ila_map *ila;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -616,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
/* Assumes skb contains a valid IPv6 header that is pulled */
- if (!ila_addr_is_ila(iaddr)) {
- /* Type indicates this is not an ILA address */
- return 0;
- }
+ /* No check here that ILA type in the mapping matches what is in the
+ * address. We assume that whatever sender gaves us can be translated.
+ * The checksum mode however is relevant.
+ */
rcu_read_lock();
ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan);
if (ila)
- ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral);
+ ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila);
rcu_read_unlock();
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index c0cbcb2..ec43d18 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <net/ip.h>
#include <net/udp.h>
#include <net/udplite.h>
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e5308d7..2e2804f 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -38,14 +38,6 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
static struct kmem_cache *fib6_node_kmem __read_mostly;
struct fib6_cleaner {
@@ -62,9 +54,12 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
-static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);
@@ -110,6 +105,20 @@ enum {
FIB6_NO_SERNUM_CHANGE = 0,
};
+void fib6_update_sernum(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct net *net = dev_net(rt->dst.dev);
+ struct fib6_node *fn;
+
+ spin_lock_bh(&table->tb6_lock);
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (fn)
+ fn->fn_sernum = fib6_new_sernum(net);
+ spin_unlock_bh(&table->tb6_lock);
+}
+
/*
* Auxiliary address test functions for the radix tree.
*
@@ -140,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(struct net *net)
{
struct fib6_node *fn;
fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+ if (fn)
+ net->ipv6.rt6_stats->fib_nodes++;
return fn;
}
-static void node_free_immediate(struct fib6_node *fn)
+static void node_free_immediate(struct net *net, struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
+ net->ipv6.rt6_stats->fib_nodes--;
}
static void node_free_rcu(struct rcu_head *head)
@@ -161,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head)
kmem_cache_free(fib6_node_kmem, fn);
}
-static void node_free(struct fib6_node *fn)
+static void node_free(struct net *net, struct fib6_node *fn)
{
call_rcu(&fn->rcu, node_free_rcu);
+ net->ipv6.rt6_stats->fib_nodes--;
}
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
@@ -185,9 +198,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL;
}
}
-
- free_percpu(non_pcpu_rt->rt6i_pcpu);
- non_pcpu_rt->rt6i_pcpu = NULL;
}
EXPORT_SYMBOL_GPL(rt6_free_pcpu);
@@ -205,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
* Initialize table lock at a single place to give lockdep a key,
* tables aren't visible prior to being linked to the list.
*/
- rwlock_init(&tb->tb6_lock);
-
+ spin_lock_init(&tb->tb6_lock);
h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
/*
@@ -225,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table) {
table->tb6_id = id;
- table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(table->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
}
@@ -322,11 +332,8 @@ unsigned int fib6_tables_seq_read(struct net *net)
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
- read_lock_bh(&tb->tb6_lock);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
fib_seq += tb->fib_seq;
- read_unlock_bh(&tb->tb6_lock);
- }
}
rcu_read_unlock();
@@ -346,9 +353,11 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
static int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
- struct rt6_info *rt)
+ struct rt6_info *rt,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
.rt = rt,
};
@@ -372,7 +381,7 @@ static int fib6_node_dump(struct fib6_walker *w)
{
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ for_each_fib6_walker_rt(w)
fib6_rt_dump(rt, w->args);
w->leaf = NULL;
return 0;
@@ -382,9 +391,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb,
struct fib6_walker *w)
{
w->root = &tb->tb6_root;
- read_lock_bh(&tb->tb6_lock);
+ spin_lock_bh(&tb->tb6_lock);
fib6_walk(net, w);
- read_unlock_bh(&tb->tb6_lock);
+ spin_unlock_bh(&tb->tb6_lock);
}
/* Called with rcu_read_lock() */
@@ -421,7 +430,7 @@ static int fib6_dump_node(struct fib6_walker *w)
int res;
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args);
if (res < 0) {
/* Frame is full, suspend walking */
@@ -480,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->count = 0;
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
cb->args[5] = w->root->fn_sernum;
@@ -497,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
} else
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk_continue(w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res <= 0) {
fib6_walker_unlink(net, w);
cb->args[4] = 0;
@@ -580,11 +589,13 @@ out:
* node.
*/
-static struct fib6_node *fib6_add_1(struct fib6_node *root,
- struct in6_addr *addr, int plen,
- int offset, int allow_create,
- int replace_required, int sernum,
- struct netlink_ext_ack *extack)
+static struct fib6_node *fib6_add_1(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *root,
+ struct in6_addr *addr, int plen,
+ int offset, int allow_create,
+ int replace_required,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -599,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn = root;
do {
- key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
@@ -625,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
if (!(fn->fn_flags & RTN_RTINFO)) {
- rt6_release(fn->leaf);
- fn->leaf = NULL;
+ RCU_INIT_POINTER(fn->leaf, NULL);
+ rt6_release(leaf);
}
- fn->fn_sernum = sernum;
-
return fn;
}
@@ -639,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
*/
/* Try to walk down on tree. */
- fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right : fn->left;
+ fn = dir ?
+ rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock)) :
+ rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
} while (fn);
if (!allow_create) {
@@ -668,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* Create new leaf node without children.
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
-
- ln->parent = pn;
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, pn);
if (dir)
- pn->right = ln;
+ rcu_assign_pointer(pn->right, ln);
else
- pn->left = ln;
+ rcu_assign_pointer(pn->left, ln);
return ln;
@@ -694,7 +706,8 @@ insert_above:
* and the current
*/
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
/* find 1st bit in difference between the 2 addrs.
@@ -710,14 +723,14 @@ insert_above:
* (new leaf node)[ln] (old node)[fn]
*/
if (plen > bit) {
- in = node_alloc();
- ln = node_alloc();
+ in = node_alloc(net);
+ ln = node_alloc(net);
if (!in || !ln) {
if (in)
- node_free_immediate(in);
+ node_free_immediate(net, in);
if (ln)
- node_free_immediate(ln);
+ node_free_immediate(net, ln);
return ERR_PTR(-ENOMEM);
}
@@ -731,31 +744,28 @@ insert_above:
in->fn_bit = bit;
- in->parent = pn;
+ RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&in->leaf->rt6i_ref);
-
- in->fn_sernum = sernum;
+ atomic_inc(&rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock))->rt6i_ref);
/* update parent pointer */
if (dir)
- pn->right = in;
+ rcu_assign_pointer(pn->right, in);
else
- pn->left = in;
+ rcu_assign_pointer(pn->left, in);
ln->fn_bit = plen;
- ln->parent = in;
- fn->parent = in;
-
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, in);
+ rcu_assign_pointer(fn->parent, in);
if (addr_bit_set(addr, bit)) {
- in->right = ln;
- in->left = fn;
+ rcu_assign_pointer(in->right, ln);
+ rcu_assign_pointer(in->left, fn);
} else {
- in->left = ln;
- in->right = fn;
+ rcu_assign_pointer(in->left, ln);
+ rcu_assign_pointer(in->right, fn);
}
} else { /* plen <= bit */
@@ -765,28 +775,26 @@ insert_above:
* (old node)[fn] NULL
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
- ln->parent = pn;
-
- ln->fn_sernum = sernum;
-
- if (dir)
- pn->right = ln;
- else
- pn->left = ln;
+ RCU_INIT_POINTER(ln->parent, pn);
if (addr_bit_set(&key->addr, plen))
- ln->right = fn;
+ RCU_INIT_POINTER(ln->right, fn);
else
- ln->left = fn;
+ RCU_INIT_POINTER(ln->left, fn);
+
+ rcu_assign_pointer(fn->parent, ln);
- fn->parent = ln;
+ if (dir)
+ rcu_assign_pointer(pn->right, ln);
+ else
+ rcu_assign_pointer(pn->left, ln);
}
return ln;
}
@@ -832,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
struct net *net)
{
+ struct fib6_table *table = rt->rt6i_table;
+
if (atomic_read(&rt->rt6i_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -840,12 +850,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
- fn->leaf = fib6_find_prefix(net, fn);
- atomic_inc(&fn->leaf->rt6i_ref);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_leaf;
+ if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
+ new_leaf = fib6_find_prefix(net, table, fn);
+ atomic_inc(&new_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_leaf);
rt6_release(rt);
}
- fn = fn->parent;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
}
}
}
@@ -855,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
- struct nl_info *info, struct mx6_config *mxc)
+ struct nl_info *info, struct mx6_config *mxc,
+ struct netlink_ext_ack *extack)
{
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct rt6_info *iter = NULL;
- struct rt6_info **ins;
- struct rt6_info **fallback_ins = NULL;
+ struct rt6_info __rcu **ins;
+ struct rt6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -874,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+ for (iter = leaf; iter;
+ iter = rcu_dereference_protected(iter->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
*/
@@ -936,7 +956,8 @@ next_iter:
if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
found++;
}
@@ -950,7 +971,7 @@ next_iter:
struct rt6_info *sibling, *temp_sibling;
/* Find the first route that have the same metric */
- sibling = fn->leaf;
+ sibling = leaf;
while (sibling) {
if (sibling->rt6i_metric == rt->rt6i_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -958,7 +979,8 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = sibling->dst.rt6_next;
+ sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -987,12 +1009,12 @@ add:
if (err)
return err;
- rt->dst.rt6_next = iter;
- *ins = rt;
- rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(rt->dst.rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt);
+ rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1016,12 +1038,12 @@ add:
if (err)
return err;
- *ins = rt;
+ atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->dst.rt6_next = iter->dst.rt6_next;
- atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt);
+ rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
@@ -1031,14 +1053,15 @@ add:
nsiblings = iter->rt6i_nsiblings;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->dst.rt6_next;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
@@ -1046,14 +1069,16 @@ add:
*ins = iter->dst.rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->dst.rt6_next;
}
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}
@@ -1077,16 +1102,33 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
+static void fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
+{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+
+ /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+ fn->fn_sernum = sernum;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
* with source addr info in sub-trees
+ * Need to own table->tb6_lock
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc,
struct netlink_ext_ack *extack)
{
+ struct fib6_table *table = rt->rt6i_table;
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
int allow_create = 1;
@@ -1095,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
return -EINVAL;
+ if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1105,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ fn = fib6_add_1(info->nl_net, table, root,
+ &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum, extack);
+ replace_required, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -1120,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (!fn->subtree) {
+ if (!rcu_access_pointer(fn->subtree)) {
struct fib6_node *sfn;
/*
@@ -1134,42 +1179,40 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
*/
/* Create subtree root node */
- sfn = node_alloc();
+ sfn = node_alloc(info->nl_net);
if (!sfn)
goto failure;
- sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ rcu_assign_pointer(sfn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = sernum;
/* Now add the first leaf node to new subtree */
- sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, sfn,
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
root, and then (in failure) stale node
in main tree.
*/
- node_free_immediate(sfn);
+ node_free_immediate(info->nl_net, sfn);
err = PTR_ERR(sn);
goto failure;
}
/* Now link new subtree to main tree */
- sfn->parent = fn;
- fn->subtree = sfn;
+ rcu_assign_pointer(sfn->parent, fn);
+ rcu_assign_pointer(fn->subtree, sfn);
} else {
- sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
@@ -1177,19 +1220,18 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
}
- if (!fn->leaf) {
- fn->leaf = rt;
+ if (!rcu_access_pointer(fn->leaf)) {
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
}
fn = sn;
}
#endif
- err = fib6_add_rt2node(fn, rt, info, mxc);
+ err = fib6_add_rt2node(fn, rt, info, mxc, extack);
if (!err) {
+ fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -1199,19 +1241,23 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- if (pn != fn && pn->leaf == rt) {
- pn->leaf = NULL;
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn != fn && pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
atomic_dec(&rt->rt6i_ref);
}
- if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn->leaf = fib6_find_prefix(info->nl_net, pn);
+ if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
#if RT6_DEBUG >= 2
- if (!pn->leaf) {
- WARN_ON(pn->leaf == NULL);
- pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf = info->nl_net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&pn->leaf->rt6i_ref);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
}
#endif
goto failure;
@@ -1226,7 +1272,7 @@ failure:
* fn->leaf.
*/
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
- fib6_repair_tree(info->nl_net, fn);
+ fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
*/
@@ -1264,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
dir = addr_bit_set(args->addr, fn->fn_bit);
- next = dir ? fn->right : fn->left;
+ next = dir ? rcu_dereference(fn->right) :
+ rcu_dereference(fn->left);
if (next) {
fn = next;
@@ -1274,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
}
while (fn) {
- if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree || fn->fn_flags & RTN_RTINFO) {
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
- key = (struct rt6key *) ((u8 *) fn->leaf +
- args->offset);
+ if (!leaf)
+ goto backtrack;
+
+ key = (struct rt6key *) ((u8 *)leaf + args->offset);
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree) {
+ if (subtree) {
struct fib6_node *sfn;
- sfn = fib6_lookup_1(fn->subtree,
- args + 1);
+ sfn = fib6_lookup_1(subtree, args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
@@ -1295,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
return fn;
}
}
-#ifdef CONFIG_IPV6_SUBTREES
backtrack:
-#endif
if (fn->fn_flags & RTN_ROOT)
break;
- fn = fn->parent;
+ fn = rcu_dereference(fn->parent);
}
return NULL;
}
+/* called with rcu_read_lock() held
+ */
struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
@@ -1337,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
/*
* Get node with specified destination prefix (and source prefix,
* if subtrees are used)
+ * exact_match == true means we try to find fn with exact match of
+ * the passed in prefix addr
+ * exact_match == false means we try to find fn with longest prefix
+ * match of the passed in prefix addr. This is useful for finding fn
+ * for cached route as it will be stored in the exception table under
+ * the node with longest prefix length.
*/
static struct fib6_node *fib6_locate_1(struct fib6_node *root,
const struct in6_addr *addr,
- int plen, int offset)
+ int plen, int offset,
+ bool exact_match)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
+ struct rt6key *key;
+
+ /* This node is being deleted */
+ if (!leaf) {
+ if (plen <= fn->fn_bit)
+ goto out;
+ else
+ goto next;
+ }
+
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
*/
if (plen < fn->fn_bit ||
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
- return NULL;
+ goto out;
if (plen == fn->fn_bit)
return fn;
+ prev = fn;
+
+next:
/*
* We have more bits to go
*/
if (addr_bit_set(addr, fn->fn_bit))
- fn = fn->right;
+ fn = rcu_dereference(fn->right);
else
- fn = fn->left;
+ fn = rcu_dereference(fn->left);
}
- return NULL;
+out:
+ if (exact_match)
+ return NULL;
+ else
+ return prev;
}
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match)
{
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- offsetof(struct rt6_info, rt6i_dst));
+ offsetof(struct rt6_info, rt6i_dst),
+ exact_match);
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
WARN_ON(saddr == NULL);
- if (fn && fn->subtree)
- fn = fib6_locate_1(fn->subtree, saddr, src_len,
- offsetof(struct rt6_info, rt6i_src));
+ if (fn) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree) {
+ fn = fib6_locate_1(subtree, saddr, src_len,
+ offsetof(struct rt6_info, rt6i_src),
+ exact_match);
+ }
+ }
}
#endif
@@ -1400,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
+ struct fib6_node *child_left, *child_right;
+
if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
while (fn) {
- if (fn->left)
- return fn->left->leaf;
- if (fn->right)
- return fn->right->leaf;
+ child_left = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ child_right = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_left)
+ return rcu_dereference_protected(child_left->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_right)
+ return rcu_dereference_protected(child_right->leaf,
+ lockdep_is_held(&table->tb6_lock));
fn = FIB6_SUBTREE(fn);
}
@@ -1419,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
/*
* Called to trim the tree of intermediate nodes when possible. "fn"
* is the node we want to try and remove.
+ * Need to own table->tb6_lock
*/
static struct fib6_node *fib6_repair_tree(struct net *net,
- struct fib6_node *fn)
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
int children;
int nstate;
- struct fib6_node *child, *pn;
+ struct fib6_node *child;
struct fib6_walker *w;
int iter = 0;
for (;;) {
+ struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_fn_leaf;
+
RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
WARN_ON(fn->fn_flags & RTN_TL_ROOT);
- WARN_ON(fn->leaf);
+ WARN_ON(fn_leaf);
children = 0;
child = NULL;
- if (fn->right)
- child = fn->right, children |= 1;
- if (fn->left)
- child = fn->left, children |= 2;
+ if (fn_r)
+ child = fn_r, children |= 1;
+ if (fn_l)
+ child = fn_l, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1451,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
|| (children && fn->fn_flags & RTN_ROOT)
#endif
) {
- fn->leaf = fib6_find_prefix(net, fn);
+ new_fn_leaf = fib6_find_prefix(net, table, fn);
#if RT6_DEBUG >= 2
- if (!fn->leaf) {
- WARN_ON(!fn->leaf);
- fn->leaf = net->ipv6.ip6_null_entry;
+ if (!new_fn_leaf) {
+ WARN_ON(!new_fn_leaf);
+ new_fn_leaf = net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&fn->leaf->rt6i_ref);
- return fn->parent;
+ atomic_inc(&new_fn_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_fn_leaf);
+ return pn;
}
- pn = fn->parent;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
WARN_ON(!(fn->fn_flags & RTN_ROOT));
- FIB6_SUBTREE(pn) = NULL;
+ RCU_INIT_POINTER(pn->subtree, NULL);
nstate = FWS_L;
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn)
- pn->right = child;
- else if (pn->left == fn)
- pn->left = child;
+ if (pn_r == fn)
+ rcu_assign_pointer(pn->right, child);
+ else if (pn_l == fn)
+ rcu_assign_pointer(pn->left, child);
#if RT6_DEBUG >= 2
else
WARN_ON(1);
#endif
if (child)
- child->parent = pn;
+ rcu_assign_pointer(child->parent, pn);
nstate = FWS_R;
#ifdef CONFIG_IPV6_SUBTREES
}
@@ -1489,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (!child) {
- if (w->root == fn) {
- w->root = w->node = NULL;
- RT6_TRACE("W %p adjusted by delroot 1\n", w);
- } else if (w->node == fn) {
+ if (w->node == fn) {
RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
} else {
- if (w->root == fn) {
- w->root = child;
- RT6_TRACE("W %p adjusted by delroot 2\n", w);
- }
if (w->node == fn) {
w->node = child;
if (children&2) {
@@ -1516,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- node_free(fn);
+ node_free(net, fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
- rt6_release(pn->leaf);
- pn->leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ rt6_release(pn_leaf);
fn = pn;
}
}
-static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
- struct nl_info *info)
+static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
+ struct rt6_info __rcu **rtp, struct nl_info *info)
{
struct fib6_walker *w;
- struct rt6_info *rt = *rtp;
+ struct rt6_info *rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
+ WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
/* Unlink it */
*rtp = rt->dst.rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
+
/* Reset round-robin state, if necessary */
- if (fn->rr_ptr == rt)
+ if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
/* Remove this entry from other siblings */
@@ -1561,36 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rt->dst.rt6_next;
+ w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
}
}
read_unlock(&net->ipv6.fib6_walker_lock);
- rt->dst.rt6_next = NULL;
-
/* If it was last route, expunge its radix tree node */
- if (!fn->leaf) {
+ if (!rcu_access_pointer(fn->leaf)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
- fn = fib6_repair_tree(net, fn);
+ fn = fib6_repair_tree(net, table, fn);
}
fib6_purge_rt(rt, fn, net);
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt);
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
rt6_release(rt);
}
+/* Need to own table->tb6_lock */
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ struct fib6_table *table = rt->rt6i_table;
struct net *net = info->nl_net;
- struct rt6_info **rtp;
+ struct rt6_info __rcu **rtp;
+ struct rt6_info __rcu **rtp_next;
#if RT6_DEBUG >= 2
if (rt->dst.obsolete > 0) {
@@ -1603,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags & RTF_CACHE)) {
- struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
- /* clones of this route might be in another subtree */
- if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags & RTN_ROOT))
- pn = pn->parent;
- pn = pn->parent;
- }
-#endif
- fib6_prune_clones(info->nl_net, pn);
- }
+ /* remove cached dst from exception table */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return rt6_remove_exception_rt(rt);
/*
* Walk the leaf entries looking for ourself
*/
- for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
- if (*rtp == rt) {
- fib6_del_route(fn, rtp, info);
+ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
+ struct rt6_info *cur = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
+ if (rt == cur) {
+ fib6_del_route(table, fn, rtp, info);
return 0;
}
+ rtp_next = &cur->dst.rt6_next;
}
return -ENOENT;
}
@@ -1651,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* 0 -> walk is complete.
* >0 -> walk is incomplete (i.e. suspended)
* <0 -> walk is terminated by an error.
+ *
+ * This function is called with tb6_lock held.
*/
static int fib6_walk_continue(struct fib6_walker *w)
{
- struct fib6_node *fn, *pn;
+ struct fib6_node *fn, *pn, *left, *right;
+
+ /* w->root should always be table->tb6_root */
+ WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
for (;;) {
fn = w->node;
if (!fn)
return 0;
- if (w->prune && fn != w->root &&
- fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
- w->state = FWS_C;
- w->leaf = fn->leaf;
- }
switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
case FWS_S:
@@ -1676,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
w->state = FWS_L;
#endif
+ /* fall through */
case FWS_L:
- if (fn->left) {
- w->node = fn->left;
+ left = rcu_dereference_protected(fn->left, 1);
+ if (left) {
+ w->node = left;
w->state = FWS_INIT;
continue;
}
w->state = FWS_R;
+ /* fall through */
case FWS_R:
- if (fn->right) {
- w->node = fn->right;
+ right = rcu_dereference_protected(fn->right, 1);
+ if (right) {
+ w->node = right;
w->state = FWS_INIT;
continue;
}
w->state = FWS_C;
- w->leaf = fn->leaf;
+ w->leaf = rcu_dereference_protected(fn->leaf, 1);
+ /* fall through */
case FWS_C:
if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
@@ -1709,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
skip:
w->state = FWS_U;
+ /* fall through */
case FWS_U:
if (fn == w->root)
return 0;
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent, 1);
+ left = rcu_dereference_protected(pn->left, 1);
+ right = rcu_dereference_protected(pn->right, 1);
w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
@@ -1721,13 +1836,13 @@ skip:
continue;
}
#endif
- if (pn->left == fn) {
+ if (left == fn) {
w->state = FWS_R;
continue;
}
- if (pn->right == fn) {
+ if (right == fn) {
w->state = FWS_C;
- w->leaf = w->node->leaf;
+ w->leaf = rcu_dereference_protected(w->node->leaf, 1);
continue;
}
#if RT6_DEBUG >= 2
@@ -1770,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w)
return 0;
}
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
@@ -1798,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w)
* func is called on each route.
* It may return -1 -> delete this route.
* 0 -> continue walking
- *
- * prune==1 -> only immediate children of node (certainly,
- * ignoring pure split nodes) will be scanned.
*/
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
- bool prune, int sernum, void *arg)
+ int sernum, void *arg)
{
struct fib6_cleaner c;
c.w.root = root;
c.w.func = fib6_clean_node;
- c.w.prune = prune;
c.w.count = 0;
c.w.skip = 0;
c.func = func;
@@ -1834,10 +1945,10 @@ static void __fib6_clean_all(struct net *net,
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, false, sernum, arg);
- write_unlock_bh(&table->tb6_lock);
+ func, sernum, arg);
+ spin_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
@@ -1849,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
}
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
- if (rt->rt6i_flags & RTF_CACHE) {
- RT6_TRACE("pruning clone %p\n", rt);
- return -1;
- }
-
- return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
- fib6_clean_tree(net, fn, fib6_prune_clone, true,
- FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
@@ -1876,12 +1971,6 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-struct fib6_gc_args
-{
- int timeout;
- int more;
-};
-
static int fib6_age(struct rt6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
@@ -1890,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
/*
* check addrconf expiration here.
* Routes are expired even if they are in use.
- *
- * Also age clones. Note, that clones are aged out
- * only if they are not in use now.
*/
if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1901,31 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
gc_args->more++;
- } else if (rt->rt6i_flags & RTF_CACHE) {
- if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
- rt->dst.obsolete = DST_OBSOLETE_KILL;
- if (atomic_read(&rt->dst.__refcnt) == 1 &&
- rt->dst.obsolete == DST_OBSOLETE_KILL) {
- RT6_TRACE("aging clone %p\n", rt);
- return -1;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
- struct neighbour *neigh;
- __u8 neigh_flags = 0;
-
- neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
- if (neigh) {
- neigh_flags = neigh->flags;
- neigh_release(neigh);
- }
- if (!(neigh_flags & NTF_ROUTER)) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
- return -1;
- }
- }
- gc_args->more++;
}
+ /* Also age clones in the exception table.
+ * Note, that clones are aged out
+ * only if they are not in use now.
+ */
+ rt6_age_exceptions(rt, gc_args, now);
+
return 0;
}
@@ -1993,7 +2062,8 @@ static int __net_init fib6_net_init(struct net *net)
goto out_fib_table_hash;
net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
- net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2004,7 +2074,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib6_local_tbl)
goto out_fib6_main_tbl;
net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
- net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2134,7 +2205,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 1;
do {
- iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->w.leaf = rcu_dereference_protected(
+ iter->w.leaf->dst.rt6_next,
+ lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
return 1;
@@ -2199,7 +2272,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = ((struct rt6_info *)v)->dst.rt6_next;
+ n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
if (n) {
++*pos;
return n;
@@ -2207,9 +2280,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter_table:
ipv6_route_check_sernum(iter);
- read_lock(&iter->tbl->tb6_lock);
+ spin_lock_bh(&iter->tbl->tb6_lock);
r = fib6_walk_continue(&iter->w);
- read_unlock(&iter->tbl->tb6_lock);
+ spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
if (v)
++*pos;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 8081baf..15535ee 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
}
opt_space->dst1opt = fopt->dst1opt;
opt_space->opt_flen = fopt->opt_flen;
+ opt_space->tot_len = fopt->tot_len;
return opt_space;
}
EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4b58f964..3e10c51 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
- break;
+ if (code != ICMPV6_PORT_UNREACH)
+ break;
+ return;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
+ break;
}
- break;
+ return;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- break;
+ return;
case ICMPV6_PKT_TOOBIG:
mtu = be32_to_cpu(info) - offset - t->tun_hlen;
if (t->dev->type == ARPHRD_ETHER)
@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
- break;
+ return;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -500,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : proto;
+ struct dst_entry *dst = skb_dst(skb);
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -515,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
tunnel->o_seqno++;
/* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
@@ -1313,6 +1321,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
}
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 713676f..0204549 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/icmpv6.h>
#include <linux/mutex.h>
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cdb3728..4a87f94 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (gso_partial)
+ if (gso_partial && skb_is_gso(skb))
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 43ca864..5110a41 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1161,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
if (unlikely(!v6_cork->opt))
return -ENOBUFS;
- v6_cork->opt->tot_len = opt->tot_len;
+ v6_cork->opt->tot_len = sizeof(*opt);
v6_cork->opt->opt_flen = opt->opt_flen;
v6_cork->opt->opt_nflen = opt->opt_nflen;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e0f9bc0..439d65f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -593,6 +593,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case NDISC_REDIRECT:
rel_type = ICMP_REDIRECT;
rel_code = ICMP_REDIR_HOST;
+ /* fall through */
default:
return 0;
}
@@ -769,7 +770,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
if ((ipv6_addr_is_multicast(laddr) ||
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
- likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
+ ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
+ likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
ret = 1;
}
return ret;
@@ -999,7 +1001,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
- else if (!ipv6_addr_is_multicast(raddr) &&
+ else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+ !ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
@@ -1043,6 +1046,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
+ unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
bool use_cache = false;
@@ -1124,7 +1128,7 @@ route_lookup:
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
+ mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1133,7 +1137,7 @@ route_lookup:
mtu = IPV6_MIN_MTU;
if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
+ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 714914d..dbb74f3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
struct xfrm_state *x;
+ int pkt_len = skb->len;
int err = -1;
int mtu;
@@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += skb->len;
+ tstats->tx_bytes += pkt_len;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f5500f5..59fad81 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1722,6 +1722,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT6_ADD_MFC_PROXY:
case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 266a530..f9c3ffe 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -46,6 +46,7 @@
#endif
#include <linux/if_addr.h>
+#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
@@ -822,7 +823,7 @@ have_ifp:
* who is doing DAD
* so fail our DAD process
*/
- addrconf_dad_failure(ifp);
+ addrconf_dad_failure(skb, ifp);
return;
} else {
/*
@@ -975,7 +976,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (ifp) {
if (skb->pkt_type != PACKET_LOOPBACK
&& (ifp->flags & IFA_F_TENTATIVE)) {
- addrconf_dad_failure(ifp);
+ addrconf_dad_failure(skb, ifp);
return;
}
/* What should we make now? The advertisement
@@ -989,8 +990,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
*/
if (skb->pkt_type != PACKET_LOOPBACK)
ND_PRINTK(1, warn,
- "NA: someone advertises our address %pI6 on %s!\n",
- &ifp->addr, ifp->idev->dev->name);
+ "NA: %pM advertised our address %pI6c on %s!\n",
+ eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
in6_ifa_put(ifp);
return;
}
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fe180c9..c6ee0cd 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the netfilter modules on top of IPv6.
#
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index a5cd43d..437af8c 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
nexthdr = ipv6_hdr(skb)->nexthdr;
thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
- if (thoff < 0)
+ if (thoff < 0 || nexthdr != IPPROTO_TCP)
return NF_ACCEPT;
th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3..977d890 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
-static void nf_ct_frag6_expire(unsigned long data)
+static void nf_ct_frag6_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq, &nf_frags);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 46d6dba..1d2fb92 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd..d12c55d 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6,
- (struct icmp6hdr *) &pfh.icmph,
- len);
+ icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *)&pfh.icmph, len);
}
release_sock(sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e4462b0..761a473 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1055,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
@@ -1077,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_setsockopt(sk, level, optname,
optval, optlen);
@@ -1138,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
@@ -1160,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_getsockopt(sk, level, optname,
optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 846012e..afbc000 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
-static void ip6_frag_expire(unsigned long data)
+static void ip6_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6_expire_frag_queue(net, fq, &ip6_frags);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 26cc9f4..70d9659 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
#include <linux/seq_file.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
+ struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock);
list_del(&rt->rt6i_uncached);
+ atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
spin_unlock_bh(&ul->lock);
}
}
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt)
+ if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
+ }
return rt;
}
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
if (rt) {
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
- if (rt->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **p;
-
- p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
- /* no one shares rt */
- *p = NULL;
- }
- } else {
+ if (!rt->rt6i_pcpu) {
dst_release_immediate(&rt->dst);
return NULL;
}
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_exception_bucket *bucket;
struct dst_entry *from = dst->from;
struct inet6_dev *idev;
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
+ if (bucket) {
+ rt->rt6i_exception_bucket = NULL;
+ kfree(bucket);
+ }
dst->from = NULL;
dst_release(from);
@@ -478,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
}
/*
- * Route lookup. Any table->tb6_lock is implied.
+ * Route lookup. rcu_read_lock() should be held.
*/
static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -493,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (!oif && ipv6_addr_any(saddr))
goto out;
- for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
struct net_device *dev = sprt->dst.dev;
if (oif) {
@@ -702,6 +706,7 @@ out:
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *leaf,
struct rt6_info *rr_head,
u32 metric, int oif, int strict,
bool *do_rr)
@@ -711,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -720,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
- for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ for (rt = leaf; rt && rt != rr_head;
+ rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -732,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rt->dst.rt6_next)
+ for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+ int oif, int strict)
{
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6_info *match, *rt0;
- struct net *net;
bool do_rr = false;
+ int key_plen;
- rt0 = fn->rr_ptr;
+ if (!leaf || leaf == net->ipv6.ip6_null_entry)
+ return net->ipv6.ip6_null_entry;
+
+ rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
- fn->rr_ptr = rt0 = fn->leaf;
+ rt0 = leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ /* Double check to make sure fn is not an intermediate node
+ * and fn->leaf does not points to its child's leaf
+ * (This might happen if all routes under fn are deleted from
+ * the tree and fib6_repair_tree() is called on the node.)
+ */
+ key_plen = rt0->rt6i_dst.plen;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt0->rt6i_src.plen)
+ key_plen = rt0->rt6i_src.plen;
+#endif
+ if (fn->fn_bit != key_plen)
+ return net->ipv6.ip6_null_entry;
+
+ match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rt0->dst.rt6_next;
+ struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
- next = fn->leaf;
-
- if (next != rt0)
- fn->rr_ptr = next;
+ next = leaf;
+
+ if (next != rt0) {
+ spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+ /* make sure next is not being deleted from the tree */
+ if (next->rt6i_node)
+ rcu_assign_pointer(fn->rr_ptr, next);
+ spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+ }
}
- net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
@@ -850,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
struct in6_addr *saddr)
{
- struct fib6_node *pn;
+ struct fib6_node *pn, *sn;
while (1) {
if (fn->fn_flags & RTN_TL_ROOT)
return NULL;
- pn = fn->parent;
- if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
- fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ pn = rcu_dereference(fn->parent);
+ sn = FIB6_SUBTREE(pn);
+ if (sn && sn != fn)
+ fn = fib6_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
@@ -864,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
+ bool null_fallback)
+{
+ struct rt6_info *rt = *prt;
+
+ if (dst_hold_safe(&rt->dst))
+ return true;
+ if (null_fallback) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ } else {
+ rt = NULL;
+ }
+ *prt = rt;
+ return false;
+}
+
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6, int flags)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
- struct rt6_info *rt;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- rt = fn->leaf;
- rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
- if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+ rt = rcu_dereference(fn->leaf);
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ } else {
+ rt = rt6_device_match(net, rt, &fl6->saddr,
+ fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6,
+ fl6->flowi6_oif, flags);
+ }
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
}
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ /* Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
+
+ if (ip6_hold_safe(net, &rt, true))
+ dst_use_noref(&rt->dst, jiffies);
+
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
@@ -938,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
struct fib6_table *table;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
return err;
}
@@ -1038,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
return pcpu_rt;
}
-/* It should be called with read_lock_bh(&tb6_lock) acquired */
+/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt, **p;
@@ -1046,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt) {
- dst_hold(&pcpu_rt->dst);
+ if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
rt6_dst_from_metrics_check(pcpu_rt);
- }
+
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1066,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return net->ipv6.ip6_null_entry;
}
- read_lock_bh(&table->tb6_lock);
- if (rt->rt6i_pcpu) {
- p = this_cpu_ptr(rt->rt6i_pcpu);
- prev = cmpxchg(p, NULL, pcpu_rt);
- if (prev) {
- /* If someone did it before us, return prev instead */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = prev;
- }
- } else {
- /* rt has been removed from the fib6 tree
- * before we have a chance to acquire the read_lock.
- * In this case, don't brother to create a pcpu rt
- * since rt is going away anyway. The next
- * dst_check() will trigger a re-lookup.
- */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = rt;
- }
dst_hold(&pcpu_rt->dst);
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ BUG_ON(prev);
+
rt6_dst_from_metrics_check(pcpu_rt);
- read_unlock_bh(&table->tb6_lock);
return pcpu_rt;
}
+/* exception hash table implementation
+ */
+static DEFINE_SPINLOCK(rt6_exception_lock);
+
+/* Remove rt6_ex from hash table and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex)
+{
+ struct net *net;
+
+ if (!bucket || !rt6_ex)
+ return;
+
+ net = dev_net(rt6_ex->rt6i->dst.dev);
+ rt6_ex->rt6i->rt6i_node = NULL;
+ hlist_del_rcu(&rt6_ex->hlist);
+ rt6_release(rt6_ex->rt6i);
+ kfree_rcu(rt6_ex, rcu);
+ WARN_ON_ONCE(!bucket->depth);
+ bucket->depth--;
+ net->ipv6.rt6_stats->fib_rt_cache--;
+}
+
+/* Remove oldest rt6_ex in bucket and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
+{
+ struct rt6_exception *rt6_ex, *oldest = NULL;
+
+ if (!bucket)
+ return;
+
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
+ oldest = rt6_ex;
+ }
+ rt6_remove_exception(bucket, oldest);
+}
+
+static u32 rt6_exception_hash(const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ static u32 seed __read_mostly;
+ u32 val;
+
+ net_get_random_once(&seed, sizeof(seed));
+ val = jhash(dst, sizeof(*dst), seed);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src)
+ val = jhash(src, sizeof(*src), val);
+#endif
+ return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rt6_exception_lock
+ */
+static struct rt6_exception *
+__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rcu_read_lock()
+ */
+static struct rt6_exception *
+__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+static int rt6_insert_exception(struct rt6_info *nrt,
+ struct rt6_info *ort)
+{
+ struct net *net = dev_net(ort->dst.dev);
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err = 0;
+
+ /* ort can't be a cache or pcpu route */
+ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+ ort = (struct rt6_info *)ort->dst.from;
+ WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
+
+ spin_lock_bh(&rt6_exception_lock);
+
+ if (ort->exception_bucket_flushed) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket) {
+ bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
+ GFP_ATOMIC);
+ if (!bucket) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ }
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates ort is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (ort->rt6i_src.plen)
+ src_key = &nrt->rt6i_src.addr;
+#endif
+
+ /* Update rt6i_prefsrc as it could be changed
+ * in rt6_remove_prefsrc()
+ */
+ nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ /* rt6_mtu_change() might lower mtu on ort.
+ * Only insert this exception route if its mtu
+ * is less than ort's mtu value.
+ */
+ if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_remove_exception(bucket, rt6_ex);
+
+ rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
+ if (!rt6_ex) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rt6_ex->rt6i = nrt;
+ rt6_ex->stamp = jiffies;
+ atomic_inc(&nrt->rt6i_ref);
+ nrt->rt6i_node = ort->rt6i_node;
+ hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
+ bucket->depth++;
+ net->ipv6.rt6_stats->fib_rt_cache++;
+
+ if (bucket->depth > FIB6_MAX_DEPTH)
+ rt6_exception_remove_oldest(bucket);
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+
+ /* Update fn->fn_sernum to invalidate all cached dst */
+ if (!err) {
+ fib6_update_sernum(ort);
+ fib6_force_start_gc(net);
+ }
+
+ return err;
+}
+
+void rt6_flush_exceptions(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ spin_lock_bh(&rt6_exception_lock);
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ rt->exception_bucket_flushed = 1;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket)
+ goto out;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
+ rt6_remove_exception(bucket, rt6_ex);
+ WARN_ON_ONCE(bucket->depth);
+ bucket++;
+ }
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+/* Find cached rt in the hash table inside passed in rt
+ * Caller has to hold rcu_read_lock()
+ */
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ struct rt6_info *res = NULL;
+
+ bucket = rcu_dereference(rt->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates rt is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (rt->rt6i_src.plen)
+ src_key = saddr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+
+ if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+ res = rt6_ex->rt6i;
+
+ return res;
+}
+
+/* Remove the passed in cached rt from the hash table that contains it */
+int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
+
+ if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ return -ENOENT;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_spinlock(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex) {
+ rt6_remove_exception(bucket, rt6_ex);
+ err = 0;
+ } else {
+ err = -ENOENT;
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+ return err;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return;
+
+ rcu_read_lock();
+ bucket = rcu_dereference(from->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_ex->stamp = jiffies;
+
+ rcu_read_unlock();
+}
+
+static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
+ }
+ bucket++;
+ }
+ }
+}
+
+static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
+ entry->rt6i_pmtu = mtu;
+ }
+ bucket++;
+ }
+ }
+}
+
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+ struct in6_addr *gateway)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
+ RTF_CACHE_GATEWAY &&
+ ipv6_addr_equal(gateway,
+ &entry->rt6i_gateway)) {
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ }
+ bucket++;
+ }
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_info *rt = rt6_ex->rt6i;
+
+ /* we are pruning and obsoleting aged-out and non gateway exceptions
+ * even if others have still references to them, so that on next
+ * dst_check() such references can be dropped.
+ * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
+ * expired, independently from their aging, as per RFC 8201 section 4
+ */
+ if (!(rt->rt6i_flags & RTF_EXPIRES) &&
+ time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ } else if (__rt6_check_expired(rt)) {
+ RT6_TRACE("purging expired route %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ gc_args->more++;
+}
+
+void rt6_age_exceptions(struct rt6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ rt6_age_examine_exception(bucket, rt6_ex,
+ gc_args, now);
+ }
+ bucket++;
+ }
+ }
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1103,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1112,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- rt = rt6_select(fn, oif, strict);
+ rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict);
if (rt == net->ipv6.ip6_null_entry) {
@@ -1127,14 +1674,23 @@ redo_rt6_select:
}
}
+ /*Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
- if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
-
- rt6_dst_from_metrics_check(rt);
-
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ if (rt == net->ipv6.ip6_null_entry) {
+ rcu_read_unlock();
+ dst_hold(&rt->dst);
+ trace_fib6_table_lookup(net, rt, table, fl6);
+ return rt;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ rt6_dst_from_metrics_check(rt);
+ }
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1146,8 +1702,14 @@ redo_rt6_select:
struct rt6_info *uncached_rt;
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ } else {
+ rcu_read_unlock();
+ uncached_rt = rt;
+ goto uncached_rt_out;
+ }
+ rcu_read_unlock();
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
dst_release(&rt->dst);
@@ -1157,12 +1719,14 @@ redo_rt6_select:
* No need for another dst_hold()
*/
rt6_uncached_list_add(uncached_rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
} else {
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
}
- trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
+uncached_rt_out:
+ trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
} else {
@@ -1170,26 +1734,28 @@ redo_rt6_select:
struct rt6_info *pcpu_rt;
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_use_noref(&rt->dst, jiffies);
+ local_bh_disable();
pcpu_rt = rt6_get_pcpu_route(rt);
- if (pcpu_rt) {
- read_unlock_bh(&table->tb6_lock);
- } else {
- /* We have to do the read_unlock first
- * because rt6_make_pcpu_route() may trigger
- * ip6_dst_gc() which will take the write_lock.
- */
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- pcpu_rt = rt6_make_pcpu_route(rt);
- dst_release(&rt->dst);
+ if (!pcpu_rt) {
+ /* atomic_inc_not_zero() is needed when using rcu */
+ if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+ /* No dst_hold() on rt is needed because grabbing
+ * rt->rt6i_ref makes sure rt can't be released.
+ */
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ rt6_release(rt);
+ } else {
+ /* rt is already removed from tree */
+ pcpu_rt = net->ipv6.ip6_null_entry;
+ dst_hold(&pcpu_rt->dst);
+ }
}
-
- trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
+ local_bh_enable();
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
return pcpu_rt;
-
}
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1325,9 +1891,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct dst_entry *new = NULL;
rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
- DST_OBSOLETE_NONE, 0);
+ DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
new = &rt->dst;
new->__use = 1;
@@ -1491,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
+ /* update rt6_ex->stamp for cache */
+ if (rt6->rt6i_flags & RTF_CACHE)
+ rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
struct rt6_info *nrt6;
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
-
- /* ip6_ins_rt(nrt6) will bump the
- * rt6->rt6i_node->fn_sernum
- * which will fail the next rt6_check() and
- * invalidate the sk->sk_dst_cache.
- */
- ip6_ins_rt(nrt6);
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt6->dst);
+ if (rt6_insert_exception(nrt6, rt6))
+ dst_release_immediate(&nrt6->dst);
}
}
}
@@ -1571,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
/* Get the "current" route for this destination and
@@ -1584,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
* routes.
*/
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -1596,8 +2157,23 @@ restart:
continue;
if (fl6->flowi6_oif != rt->dst.dev->ifindex)
continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+ rt_cache = rt6_find_cached_rt(rt,
+ &fl6->daddr,
+ &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(&rdfl->gateway,
+ &rt_cache->rt6i_gateway)) {
+ rt = rt_cache;
+ break;
+ }
continue;
+ }
break;
}
@@ -1615,11 +2191,11 @@ restart:
}
out:
- dst_hold(&rt->dst);
+ ip6_hold_safe(net, &rt, true);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
};
@@ -1766,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -1901,6 +2478,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
+ /* RTF_CACHE is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_CACHE) {
+ NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
+ goto out;
+ }
+
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
goto out;
@@ -2216,9 +2799,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
}
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_del(rt, info);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out:
ip6_rt_put(rt);
@@ -2244,7 +2827,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
if (rt == net->ipv6.ip6_null_entry)
goto out_put;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
struct rt6_info *sibling, *next_sibling;
@@ -2274,7 +2857,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
err = fib6_del(rt, info);
out_unlock:
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out_put:
ip6_rt_put(rt);
@@ -2288,9 +2871,9 @@ out_put:
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_table *table;
struct fib6_node *fn;
- struct rt6_info *rt;
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2299,17 +2882,22 @@ static int ip6_route_del(struct fib6_config *cfg,
return err;
}
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
- &cfg->fc_src, cfg->fc_src_len);
+ &cfg->fc_src, cfg->fc_src_len,
+ !(cfg->fc_flags & RTF_CACHE));
if (fn) {
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if ((rt->rt6i_flags & RTF_CACHE) &&
- !(cfg->fc_flags & RTF_CACHE))
- continue;
+ for_each_fib6_node_rt_rcu(fn) {
+ if (cfg->fc_flags & RTF_CACHE) {
+ rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ &cfg->fc_src);
+ if (!rt_cache)
+ continue;
+ rt = rt_cache;
+ }
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2321,8 +2909,9 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
continue;
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ if (!dst_hold_safe(&rt->dst))
+ break;
+ rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
@@ -2331,7 +2920,7 @@ static int ip6_route_del(struct fib6_config *cfg,
return __ip6_del_rt_siblings(rt, cfg);
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return err;
}
@@ -2435,8 +3024,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- if (ip6_ins_rt(nrt))
- goto out_release;
+ /* No need to remove rt from the exception table if rt is
+ * a cached route because rt6_insert_exception() will
+ * takes care of it
+ */
+ if (rt6_insert_exception(nrt, rt)) {
+ dst_release_immediate(&nrt->dst);
+ goto out;
+ }
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
@@ -2444,17 +3039,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags & RTF_CACHE) {
- rt = (struct rt6_info *) dst_clone(&rt->dst);
- ip6_del_rt(rt);
- }
-
-out_release:
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt->dst);
-
out:
neigh_release(neigh);
}
@@ -2511,23 +3095,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
if (!fn)
goto out;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
continue;
- dst_hold(&rt->dst);
+ ip6_hold_safe(NULL, &rt, false);
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2573,16 +3157,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
break;
}
if (rt)
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ ip6_hold_safe(NULL, &rt, false);
+ rcu_read_unlock();
return rt;
}
@@ -2620,17 +3204,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
struct rt6_info *rt;
restart:
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
(!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- ip6_del_rt(rt);
+ if (dst_hold_safe(&rt->dst)) {
+ rcu_read_unlock();
+ ip6_del_rt(rt);
+ } else {
+ rcu_read_unlock();
+ }
goto restart;
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
}
@@ -2818,8 +3405,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
if (((void *)rt->dst.dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->rt6i_prefsrc.plen = 0;
+ /* need to update cache as well */
+ rt6_exceptions_remove_prefsrc(rt);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -2836,18 +3427,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
}
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
-#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
- ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
- ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
return -1;
}
+
+ /* Further clean up cached routes in exception table.
+ * This is needed because cached route may have a different
+ * gateway than its 'parent' in the case of an ip redirect.
+ */
+ rt6_exceptions_clean_tohost(rt, gateway);
+
return 0;
}
@@ -2926,19 +3522,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
if (rt->dst.dev == arg->dev &&
dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
- if (rt->rt6i_flags & RTF_CACHE) {
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
- rt->rt6i_pmtu = arg->mtu;
- } else if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ spin_lock_bh(&rt6_exception_lock);
+ if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
+ rt6_exceptions_update_pmtu(rt, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -3839,7 +4430,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
net->ipv6.rt6_stats->fib_nodes,
net->ipv6.rt6_stats->fib_route_nodes,
- net->ipv6.rt6_stats->fib_rt_alloc,
+ atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a799f52..d60ddcb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -91,29 +91,35 @@ struct sit_net {
* Must be invoked with rcu_read_lock
*/
static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
- struct net_device *dev, __be32 remote, __be32 local)
+ struct net_device *dev,
+ __be32 remote, __be32 local,
+ int sifindex)
{
unsigned int h0 = HASH(remote);
unsigned int h1 = HASH(local);
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
+ int ifindex = dev ? dev->ifindex : 0;
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
+ (!dev || !t->parms.link || ifindex == t->parms.link ||
+ sifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
@@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
unsigned int data_len = 0;
struct ip_tunnel *t;
+ int sifindex;
int err;
switch (type) {
@@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- t = ipip6_tunnel_lookup(dev_net(skb->dev),
- skb->dev,
- iph->daddr,
- iph->saddr);
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
+ t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
+ iph->daddr, iph->saddr, sifindex);
if (!t)
goto out;
@@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct ip_tunnel *tunnel;
+ int sifindex;
int err;
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
- iph->saddr, iph->daddr);
+ iph->saddr, iph->daddr, sifindex);
if (tunnel) {
struct pcpu_sw_netstats *tstats;
@@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
{
const struct iphdr *iph;
struct ip_tunnel *tunnel;
+ int sifindex;
+
+ sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0;
iph = ip_hdr(skb);
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
- iph->saddr, iph->daddr);
+ iph->saddr, iph->daddr, sifindex);
if (tunnel) {
const struct tnl_ptk_info *tpi;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4e7817a..e7a3a6b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 6fbf8ae..a789a8a 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
*
@@ -97,6 +98,34 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "max_dst_opts_number",
+ .data = &init_net.ipv6.sysctl.max_dst_opts_cnt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_hbh_opts_number",
+ .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_dst_opts_length",
+ .data = &init_net.ipv6.sysctl.max_dst_opts_len,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "max_hbh_length",
+ .data = &init_net.ipv6.sysctl.max_hbh_opts_len,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -157,6 +186,10 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect;
+ ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt;
+ ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
+ ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
+ ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 64d94af..0e25299 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -69,6 +69,8 @@
#include <crypto/hash.h>
#include <linux/scatterlist.h>
+#include <trace/events/tcp.h>
+
static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
int genhash;
struct sock *sk1 = NULL;
#endif
- int oif;
+ int oif = 0;
if (th->rst)
return;
@@ -939,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
- oif = sk ? sk->sk_bound_dev_if : 0;
+ if (sk) {
+ oif = sk->sk_bound_dev_if;
+ trace_tcp_send_reset(sk, skb);
+ }
+
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
#ifdef CONFIG_TCP_MD5SIG
@@ -1577,8 +1583,9 @@ do_time_wait:
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v6_timewait_ack(sk, skb);
break;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index f180b3d..7903e21 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _UDP6_IMPL_H
#define _UDP6_IMPL_H
#include <net/udp.h>
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f95943a..fe04e23 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_input.c: based on net/ipv4/xfrm4_input.c
*
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 11d1314..885ade2 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_policy.c: based on xfrm4_policy.c
*
@@ -152,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
switch (nexthdr) {
case NEXTHDR_FRAGMENT:
onlyproto = 1;
+ /* fall through */
case NEXTHDR_ROUTING:
case NEXTHDR_HOP:
case NEXTHDR_DEST:
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 8a1f9c0..b15075a 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm6_state.c: based on xfrm4_state.c
*
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec..d21a9d1 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
+ /* fall through */
case SIOCGIFADDR:
rc = ipxitf_ioctl(cmd, argp);
break;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 7d75e4c..38a3d51 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPX proc routines
*
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index b5d9144..3cf93aa9 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Implements the IPX routing routines.
* Code moved from af_ipx.c.
diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c
index 32dcd60..ba7d421 100644
--- a/net/ipx/pe2.c
+++ b/net/ipx/pe2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/in.h>
#include <linux/mm.h>
#include <linux/module.h>
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index 0dafcc5..c3eef45 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_ipx.c: sysctl interface to net IPX subsystem.
*
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index c748e8a6..bd57233 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/list.h>
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2870f41..399a7e5 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the L2TP.
#
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ee485df..7c8d1eb 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -100,8 +100,6 @@ struct l2tp_skb_cb {
#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-static atomic_t l2tp_tunnel_count;
-static atomic_t l2tp_session_count;
static struct workqueue_struct *l2tp_wq;
/* per-net private data for this module */
@@ -216,12 +214,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
-/* Lookup a session. A new reference is held on the returned session.
- * Optionally calls session->ref() too if do_ref is true.
- */
+/* Lookup a session. A new reference is held on the returned session. */
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
- u32 session_id, bool do_ref)
+ u32 session_id)
{
struct hlist_head *session_list;
struct l2tp_session *session;
@@ -235,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
hlist_for_each_entry_rcu(session, session_list, global_hlist) {
if (session->session_id == session_id) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
rcu_read_unlock_bh();
return session;
@@ -252,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
hlist_for_each_entry(session, session_list, hlist) {
if (session->session_id == session_id) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
@@ -265,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
}
EXPORT_SYMBOL_GPL(l2tp_session_get);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
- bool do_ref)
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
{
int hash;
struct l2tp_session *session;
@@ -277,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
if (++count > nth) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
}
@@ -295,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
* This is very inefficient but is only used by management interfaces.
*/
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
- const char *ifname,
- bool do_ref)
+ const char *ifname)
{
struct l2tp_net *pn = l2tp_pernet(net);
int hash;
@@ -307,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
l2tp_session_inc_refcount(session);
- if (do_ref && session->ref)
- session->ref(session);
rcu_read_unlock_bh();
return session;
@@ -322,8 +308,8 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
}
EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
-static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
- struct l2tp_session *session)
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel)
{
struct l2tp_session *session_walk;
struct hlist_head *g_head;
@@ -380,6 +366,7 @@ err_tlock:
return err;
}
+EXPORT_SYMBOL_GPL(l2tp_session_register);
/* Lookup a tunnel by id
*/
@@ -484,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
else
kfree_skb(skb);
-
- if (session->deref)
- (*session->deref)(session);
}
/* Dequeue skbs from the session's reorder_q, subject to packet order.
@@ -515,8 +499,6 @@ start:
session->reorder_skip = 1;
__skb_unlink(skb, &session->reorder_q);
kfree_skb(skb);
- if (session->deref)
- (*session->deref)(session);
continue;
}
@@ -689,9 +671,6 @@ discard:
* a data (not control) frame before coming here. Fields up to the
* session-id have already been parsed and ptr points to the data
* after the session-id.
- *
- * session->ref() must have been called prior to l2tp_recv_common().
- * session->deref() will be called automatically after skb is processed.
*/
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -858,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
discard:
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
-
- if (session->deref)
- (*session->deref)(session);
}
EXPORT_SYMBOL(l2tp_recv_common);
@@ -874,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session)
while ((skb = skb_dequeue(&session->reorder_q))) {
atomic_long_inc(&session->stats.rx_errors);
kfree_skb(skb);
- if (session->deref)
- (*session->deref)(session);
}
return 0;
}
@@ -967,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
/* Find the session context */
- session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id);
if (!session || !session->recv_skb) {
- if (session) {
- if (session->deref)
- session->deref(session);
+ if (session)
l2tp_session_dec_refcount(session);
- }
/* Not found? Pass to userspace to deal with */
l2tp_info(tunnel, L2TP_MSG_DATA,
@@ -1274,7 +1245,6 @@ static void l2tp_tunnel_destruct(struct sock *sk)
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
list_del_rcu(&tunnel->list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- atomic_dec(&l2tp_tunnel_count);
l2tp_tunnel_closeall(tunnel);
@@ -1314,8 +1284,8 @@ again:
hlist_del_init(&session->hlist);
- if (session->ref != NULL)
- (*session->ref)(session);
+ if (test_and_set_bit(0, &session->dead))
+ goto again;
write_unlock_bh(&tunnel->hlist_lock);
@@ -1325,9 +1295,6 @@ again:
if (session->session_close != NULL)
(*session->session_close)(session);
- if (session->deref != NULL)
- (*session->deref)(session);
-
l2tp_session_dec_refcount(session);
write_lock_bh(&tunnel->hlist_lock);
@@ -1658,7 +1625,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
/* Add tunnel to our list */
INIT_LIST_HEAD(&tunnel->list);
- atomic_inc(&l2tp_tunnel_count);
/* Bump the reference count. The tunnel context is deleted
* only when this drops to zero. Must be done before list insertion
@@ -1685,14 +1651,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
/* This function is used by the netlink TUNNEL_DELETE command.
*/
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
- l2tp_tunnel_inc_refcount(tunnel);
- if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
- l2tp_tunnel_dec_refcount(tunnel);
- return 1;
+ if (!test_and_set_bit(0, &tunnel->dead)) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ queue_work(l2tp_wq, &tunnel->del_work);
}
- return 0;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
@@ -1706,8 +1670,6 @@ void l2tp_session_free(struct l2tp_session *session)
if (tunnel) {
BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
- if (session->session_id != 0)
- atomic_dec(&l2tp_session_count);
sock_put(tunnel->sock);
session->tunnel = NULL;
l2tp_tunnel_dec_refcount(tunnel);
@@ -1750,15 +1712,16 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash);
*/
int l2tp_session_delete(struct l2tp_session *session)
{
- if (session->ref)
- (*session->ref)(session);
+ if (test_and_set_bit(0, &session->dead))
+ return 0;
+
__l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close != NULL)
(*session->session_close)(session);
- if (session->deref)
- (*session->deref)(session);
+
l2tp_session_dec_refcount(session);
+
return 0;
}
EXPORT_SYMBOL_GPL(l2tp_session_delete);
@@ -1784,7 +1747,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
- int err;
session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
if (session != NULL) {
@@ -1842,17 +1804,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
refcount_set(&session->ref_count, 1);
- err = l2tp_session_add_to_tunnel(tunnel, session);
- if (err) {
- kfree(session);
-
- return ERR_PTR(err);
- }
-
- /* Ignore management session in session count value */
- if (session->session_id != 0)
- atomic_inc(&l2tp_session_count);
-
return session;
}
@@ -1887,7 +1838,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
rcu_read_lock_bh();
list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- (void)l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a305e0c..9534e16 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -76,6 +76,7 @@ struct l2tp_session_cfg {
struct l2tp_session {
int magic; /* should be
* L2TP_SESSION_MAGIC */
+ long dead;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel
* context */
@@ -128,8 +129,6 @@ struct l2tp_session {
int (*build_header)(struct l2tp_session *session, void *buf);
void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
void (*session_close)(struct l2tp_session *session);
- void (*ref)(struct l2tp_session *session);
- void (*deref)(struct l2tp_session *session);
#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
void (*show)(struct seq_file *m, void *priv);
#endif
@@ -160,6 +159,9 @@ struct l2tp_tunnel_cfg {
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
+
+ unsigned long dead;
+
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
bool acpt_newsess; /* Indicates whether this
@@ -241,12 +243,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
- u32 session_id, bool do_ref);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
- bool do_ref);
+ u32 session_id);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
- const char *ifname,
- bool do_ref);
+ const char *ifname);
struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
@@ -254,11 +254,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_create(int priv_size,
struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
struct l2tp_session_cfg *cfg);
+int l2tp_session_register(struct l2tp_session *session,
+ struct l2tp_tunnel *tunnel);
+
void __l2tp_session_unhash(struct l2tp_session *session);
int l2tp_session_delete(struct l2tp_session *session);
void l2tp_session_free(struct l2tp_session *session);
@@ -291,37 +294,17 @@ static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
-static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_inc_refcount(struct l2tp_session *session)
{
refcount_inc(&session->ref_count);
}
-static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+static inline void l2tp_session_dec_refcount(struct l2tp_session *session)
{
if (refcount_dec_and_test(&session->ref_count))
l2tp_session_free(session);
}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_session_inc_refcount(_s) \
-do { \
- pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_s)->name, \
- refcount_read(&_s->ref_count)); \
- l2tp_session_inc_refcount_1(_s); \
-} while (0)
-#define l2tp_session_dec_refcount(_s) \
-do { \
- pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_s)->name, \
- refcount_read(&_s->ref_count)); \
- l2tp_session_dec_refcount_1(_s); \
-} while (0)
-#else
-#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
-#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
-#endif
-
#define l2tp_printk(ptr, type, func, fmt, ...) \
do { \
if (((ptr)->debug) & (type)) \
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 53bae54..eb69411 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
if (pd->session == NULL) {
@@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
} else {
l2tp_dfs_seq_session_show(m, pd->session);
- if (pd->session->deref)
- pd->session->deref(pd->session);
l2tp_session_dec_refcount(pd->session);
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 87da9ef..3e2dec1 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -41,10 +41,8 @@
/* via netdev_priv() */
struct l2tp_eth {
- struct net_device *dev;
struct sock *tunnel_sock;
struct l2tp_session *session;
- struct list_head list;
atomic_long_t tx_bytes;
atomic_long_t tx_packets;
atomic_long_t tx_dropped;
@@ -55,26 +53,12 @@ struct l2tp_eth {
/* via l2tp_session_priv() */
struct l2tp_eth_sess {
- struct net_device *dev;
+ struct net_device __rcu *dev;
};
-/* per-net private data for this module */
-static unsigned int l2tp_eth_net_id;
-struct l2tp_eth_net {
- struct list_head l2tp_eth_dev_list;
- spinlock_t l2tp_eth_lock;
-};
-
-static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
-{
- return net_generic(net, l2tp_eth_net_id);
-}
static int l2tp_eth_dev_init(struct net_device *dev)
{
- struct l2tp_eth *priv = netdev_priv(dev);
-
- priv->dev = dev;
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
netdev_lockdep_set_classes(dev);
@@ -85,12 +69,13 @@ static int l2tp_eth_dev_init(struct net_device *dev)
static void l2tp_eth_dev_uninit(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
- struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+ struct l2tp_eth_sess *spriv;
- spin_lock(&pn->l2tp_eth_lock);
- list_del_init(&priv->list);
- spin_unlock(&pn->l2tp_eth_lock);
- dev_put(dev);
+ spriv = l2tp_session_priv(priv->session);
+ RCU_INIT_POINTER(spriv->dev, NULL);
+ /* No need for synchronize_net() here. We're called by
+ * unregister_netdev*(), which does the synchronisation for us.
+ */
}
static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -148,8 +133,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
{
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
- struct l2tp_eth *priv = netdev_priv(dev);
+ struct net_device *dev;
+ struct l2tp_eth *priv;
if (session->debug & L2TP_MSG_DATA) {
unsigned int length;
@@ -173,16 +158,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
skb_dst_drop(skb);
nf_reset(skb);
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev)
+ goto error_rcu;
+
+ priv = netdev_priv(dev);
if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
atomic_long_inc(&priv->rx_packets);
atomic_long_add(data_len, &priv->rx_bytes);
} else {
atomic_long_inc(&priv->rx_errors);
}
+ rcu_read_unlock();
+
return;
+error_rcu:
+ rcu_read_unlock();
error:
- atomic_long_inc(&priv->rx_errors);
kfree_skb(skb);
}
@@ -193,11 +187,15 @@ static void l2tp_eth_delete(struct l2tp_session *session)
if (session) {
spriv = l2tp_session_priv(session);
- dev = spriv->dev;
+
+ rtnl_lock();
+ dev = rtnl_dereference(spriv->dev);
if (dev) {
- unregister_netdev(dev);
- spriv->dev = NULL;
+ unregister_netdevice(dev);
+ rtnl_unlock();
module_put(THIS_MODULE);
+ } else {
+ rtnl_unlock();
}
}
}
@@ -207,9 +205,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
- struct net_device *dev = spriv->dev;
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(spriv->dev);
+ if (!dev) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
seq_printf(m, " interface %s\n", dev->name);
+
+ dev_put(dev);
}
#endif
@@ -273,7 +282,6 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
struct l2tp_eth *priv;
struct l2tp_eth_sess *spriv;
int rc;
- struct l2tp_eth_net *pn;
if (cfg->ifname) {
strlcpy(name, cfg->ifname, IFNAMSIZ);
@@ -287,14 +295,14 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
peer_session_id, cfg);
if (IS_ERR(session)) {
rc = PTR_ERR(session);
- goto out;
+ goto err;
}
dev = alloc_netdev(sizeof(*priv), name, name_assign_type,
l2tp_eth_dev_setup);
if (!dev) {
rc = -ENOMEM;
- goto out_del_session;
+ goto err_sess;
}
dev_net_set(dev, net);
@@ -303,9 +311,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
- priv->dev = dev;
priv->session = session;
- INIT_LIST_HEAD(&priv->list);
priv->tunnel_sock = tunnel->sock;
session->recv_skb = l2tp_eth_dev_recv;
@@ -315,48 +321,50 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
#endif
spriv = l2tp_session_priv(session);
- spriv->dev = dev;
- rc = register_netdev(dev);
- if (rc < 0)
- goto out_del_dev;
+ l2tp_session_inc_refcount(session);
- __module_get(THIS_MODULE);
- /* Must be done after register_netdev() */
- strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rtnl_lock();
- dev_hold(dev);
- pn = l2tp_eth_pernet(dev_net(dev));
- spin_lock(&pn->l2tp_eth_lock);
- list_add(&priv->list, &pn->l2tp_eth_dev_list);
- spin_unlock(&pn->l2tp_eth_lock);
+ /* Register both device and session while holding the rtnl lock. This
+ * ensures that l2tp_eth_delete() will see that there's a device to
+ * unregister, even if it happened to run before we assign spriv->dev.
+ */
+ rc = l2tp_session_register(session, tunnel);
+ if (rc < 0) {
+ rtnl_unlock();
+ goto err_sess_dev;
+ }
- return 0;
+ rc = register_netdevice(dev);
+ if (rc < 0) {
+ rtnl_unlock();
+ l2tp_session_delete(session);
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
-out_del_dev:
- free_netdev(dev);
- spriv->dev = NULL;
-out_del_session:
- l2tp_session_delete(session);
-out:
- return rc;
-}
+ return rc;
+ }
-static __net_init int l2tp_eth_init_net(struct net *net)
-{
- struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
+ strlcpy(session->ifname, dev->name, IFNAMSIZ);
+ rcu_assign_pointer(spriv->dev, dev);
- INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
- spin_lock_init(&pn->l2tp_eth_lock);
+ rtnl_unlock();
+
+ l2tp_session_dec_refcount(session);
+
+ __module_get(THIS_MODULE);
return 0;
-}
-static struct pernet_operations l2tp_eth_net_ops = {
- .init = l2tp_eth_init_net,
- .id = &l2tp_eth_net_id,
- .size = sizeof(struct l2tp_eth_net),
-};
+err_sess_dev:
+ l2tp_session_dec_refcount(session);
+ free_netdev(dev);
+err_sess:
+ kfree(session);
+err:
+ return rc;
+}
static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
@@ -371,25 +379,18 @@ static int __init l2tp_eth_init(void)
err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
if (err)
- goto out;
-
- err = register_pernet_device(&l2tp_eth_net_ops);
- if (err)
- goto out_unreg;
+ goto err;
pr_info("L2TP ethernet pseudowire support (L2TPv3)\n");
return 0;
-out_unreg:
- l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
-out:
+err:
return err;
}
static void __exit l2tp_eth_exit(void)
{
- unregister_pernet_device(&l2tp_eth_net_ops);
l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 4d322c1..6dbe450 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,7 +143,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, NULL, session_id, true);
+ session = l2tp_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -205,8 +205,6 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
goto discard;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 88b397c..59ebb6e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -156,7 +156,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_get(net, NULL, session_id, true);
+ session = l2tp_session_get(net, NULL, session_id);
if (!session)
goto discard;
@@ -219,8 +219,6 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
goto discard;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7135f46..a1f24fb 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
/* Accessed under genl lock */
static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
-static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
- bool do_ref)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
{
u32 tunnel_id;
u32 session_id;
@@ -60,15 +59,14 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
if (info->attrs[L2TP_ATTR_IFNAME]) {
ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
- session = l2tp_session_get_by_ifname(net, ifname, do_ref);
+ session = l2tp_session_get_by_ifname(net, ifname);
} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
tunnel = l2tp_tunnel_get(net, tunnel_id);
if (tunnel) {
- session = l2tp_session_get(net, tunnel, session_id,
- do_ref);
+ session = l2tp_session_get(net, tunnel, session_id);
l2tp_tunnel_dec_refcount(tunnel);
}
}
@@ -282,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_DELETE);
- (void) l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_delete(tunnel);
l2tp_tunnel_dec_refcount(tunnel);
@@ -406,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
goto nla_put_failure;
- /* NOBREAK */
+ /* fall through */
case L2TP_ENCAPTYPE_IP:
#if IS_ENABLED(CONFIG_IPV6)
if (np) {
@@ -649,7 +647,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
&cfg);
if (ret >= 0) {
- session = l2tp_session_get(net, tunnel, session_id, false);
+ session = l2tp_session_get(net, tunnel, session_id);
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
@@ -669,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
u16 pw_type;
- session = l2tp_nl_session_get(info, true);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -683,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
out:
@@ -696,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
int ret = 0;
struct l2tp_session *session;
- session = l2tp_nl_session_get(info, false);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -828,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int ret;
- session = l2tp_nl_session_get(info, false);
+ session = l2tp_nl_session_get(info);
if (session == NULL) {
ret = -ENODEV;
goto err;
@@ -874,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
goto out;
}
- session = l2tp_session_get_nth(tunnel, si, false);
+ session = l2tp_session_get_nth(tunnel, si);
if (session == NULL) {
ti++;
tunnel = NULL;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 50e3ee9..5f5c78b 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -122,8 +122,11 @@
struct pppol2tp_session {
int owner; /* pid that opened the socket */
- struct sock *sock; /* Pointer to the session
+ struct mutex sk_lock; /* Protects .sk */
+ struct sock __rcu *sk; /* Pointer to the session
* PPPoX socket */
+ struct sock *__sk; /* Copy of .sk, for cleanup */
+ struct rcu_head rcu; /* For asynchronous release */
struct sock *tunnel_sock; /* Pointer to the tunnel UDP
* socket */
int flags; /* accessed by PPPIOCGFLAGS.
@@ -138,6 +141,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = {
static const struct proto_ops pppol2tp_ops;
+/* Retrieves the pppol2tp socket associated to a session.
+ * A reference is held on the returned socket, so this function must be paired
+ * with sock_put().
+ */
+static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
+ if (sk)
+ sock_hold(sk);
+ rcu_read_unlock();
+
+ return sk;
+}
+
/* Helpers to obtain tunnel/session contexts from sockets.
*/
static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
@@ -224,7 +245,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
/* If the socket is bound, send it in to PPP's input queue. Otherwise
* queue it on the session socket.
*/
- sk = ps->sock;
+ rcu_read_lock();
+ sk = rcu_dereference(ps->sk);
if (sk == NULL)
goto no_sock;
@@ -247,30 +269,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
kfree_skb(skb);
}
}
+ rcu_read_unlock();
return;
no_sock:
+ rcu_read_unlock();
l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name);
kfree_skb(skb);
}
-static void pppol2tp_session_sock_hold(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_hold(ps->sock);
-}
-
-static void pppol2tp_session_sock_put(struct l2tp_session *session)
-{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
-
- if (ps->sock)
- sock_put(ps->sock);
-}
-
/************************************************************************
* Transmit handling
***********************************************************************/
@@ -431,16 +439,15 @@ abort:
*/
static void pppol2tp_session_close(struct l2tp_session *session)
{
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct sock *sk = ps->sock;
- struct socket *sock = sk->sk_socket;
+ struct sock *sk;
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
- if (sock) {
- inet_shutdown(sock, SEND_SHUTDOWN);
- /* Don't let the session go away before our socket does */
- l2tp_session_inc_refcount(session);
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ if (sk->sk_socket)
+ inet_shutdown(sk->sk_socket, SEND_SHUTDOWN);
+ sock_put(sk);
}
}
@@ -461,6 +468,14 @@ static void pppol2tp_session_destruct(struct sock *sk)
}
}
+static void pppol2tp_put_sk(struct rcu_head *head)
+{
+ struct pppol2tp_session *ps;
+
+ ps = container_of(head, typeof(*ps), rcu);
+ sock_put(ps->__sk);
+}
+
/* Called when the PPPoX socket (session) is closed.
*/
static int pppol2tp_release(struct socket *sock)
@@ -486,11 +501,23 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
- /* Purge any queued data */
if (session != NULL) {
- __l2tp_session_unhash(session);
- l2tp_session_queue_purge(session);
- sock_put(sk);
+ struct pppol2tp_session *ps;
+
+ l2tp_session_delete(session);
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ call_rcu(&ps->rcu, pppol2tp_put_sk);
+
+ /* Rely on the sock_put() call at the end of the function for
+ * dropping the reference held by pppol2tp_sock_to_session().
+ * The last reference will be dropped by pppol2tp_put_sk().
+ */
}
release_sock(sk);
@@ -557,16 +584,47 @@ out:
static void pppol2tp_show(struct seq_file *m, void *arg)
{
struct l2tp_session *session = arg;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
+ struct sock *sk;
+
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
- if (ps) {
- struct pppox_sock *po = pppox_sk(ps->sock);
- if (po)
- seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
}
}
#endif
+static void pppol2tp_session_init(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+ struct dst_entry *dst;
+
+ session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
+ session->show = pppol2tp_show;
+#endif
+
+ ps = l2tp_session_priv(session);
+ mutex_init(&ps->sk_lock);
+ ps->tunnel_sock = session->tunnel->sock;
+ ps->owner = current->pid;
+
+ /* If PMTU discovery was enabled, use the MTU that was discovered */
+ dst = sk_dst_get(session->tunnel->sock);
+ if (dst) {
+ u32 pmtu = dst_mtu(dst);
+
+ if (pmtu) {
+ session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD;
+ }
+ dst_release(dst);
+ }
+}
+
/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
*/
static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -578,12 +636,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
struct l2tp_session *session = NULL;
struct l2tp_tunnel *tunnel;
struct pppol2tp_session *ps;
- struct dst_entry *dst;
struct l2tp_session_cfg cfg = { 0, };
int error = 0;
u32 tunnel_id, peer_tunnel_id;
u32 session_id, peer_session_id;
bool drop_refcnt = false;
+ bool drop_tunnel = false;
int ver = 2;
int fd;
@@ -652,7 +710,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel_id == 0)
goto end;
- tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+ tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
+ if (tunnel)
+ drop_tunnel = true;
/* Special case: create tunnel context if session_id and
* peer_session_id is 0. Otherwise look up tunnel using supplied
@@ -685,7 +745,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = peer_tunnel_id;
- session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+ session = l2tp_session_get(sock_net(sk), tunnel, session_id);
if (session) {
drop_refcnt = true;
ps = l2tp_session_priv(session);
@@ -693,13 +753,17 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Using a pre-existing session is fine as long as it hasn't
* been connected yet.
*/
- if (ps->sock) {
+ mutex_lock(&ps->sk_lock);
+ if (rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock))) {
+ mutex_unlock(&ps->sk_lock);
error = -EEXIST;
goto end;
}
/* consistency checks */
if (ps->tunnel_sock != tunnel->sock) {
+ mutex_unlock(&ps->sk_lock);
error = -EEXIST;
goto end;
}
@@ -715,35 +779,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
error = PTR_ERR(session);
goto end;
}
- }
-
- /* Associate session with its PPPoL2TP socket */
- ps = l2tp_session_priv(session);
- ps->owner = current->pid;
- ps->sock = sk;
- ps->tunnel_sock = tunnel->sock;
-
- session->recv_skb = pppol2tp_recv;
- session->session_close = pppol2tp_session_close;
-#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
- session->show = pppol2tp_show;
-#endif
-
- /* We need to know each time a skb is dropped from the reorder
- * queue.
- */
- session->ref = pppol2tp_session_sock_hold;
- session->deref = pppol2tp_session_sock_put;
- /* If PMTU discovery was enabled, use the MTU that was discovered */
- dst = sk_dst_get(tunnel->sock);
- if (dst != NULL) {
- u32 pmtu = dst_mtu(dst);
+ pppol2tp_session_init(session);
+ ps = l2tp_session_priv(session);
+ l2tp_session_inc_refcount(session);
- if (pmtu != 0)
- session->mtu = session->mru = pmtu -
- PPPOL2TP_HEADER_OVERHEAD;
- dst_release(dst);
+ mutex_lock(&ps->sk_lock);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0) {
+ mutex_unlock(&ps->sk_lock);
+ kfree(session);
+ goto end;
+ }
+ drop_refcnt = true;
}
/* Special case: if source & dest session_id == 0x0000, this
@@ -768,12 +816,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
po->chan.mtu = session->mtu;
error = ppp_register_net_channel(sock_net(sk), &po->chan);
- if (error)
+ if (error) {
+ mutex_unlock(&ps->sk_lock);
goto end;
+ }
out_no_ppp:
/* This is how we get the session context from the socket. */
sk->sk_user_data = session;
+ rcu_assign_pointer(ps->sk, sk);
+ mutex_unlock(&ps->sk_lock);
+
+ /* Keep the reference we've grabbed on the session: sk doesn't expect
+ * the session to disappear. pppol2tp_session_destruct() is responsible
+ * for dropping it.
+ */
+ drop_refcnt = false;
+
sk->sk_state = PPPOX_CONNECTED;
l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
session->name);
@@ -781,6 +840,8 @@ out_no_ppp:
end:
if (drop_refcnt)
l2tp_session_dec_refcount(session);
+ if (drop_tunnel)
+ l2tp_tunnel_dec_refcount(tunnel);
release_sock(sk);
return error;
@@ -795,12 +856,11 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
{
int error;
struct l2tp_session *session;
- struct pppol2tp_session *ps;
/* Error if tunnel socket is not prepped */
if (!tunnel->sock) {
error = -ENOENT;
- goto out;
+ goto err;
}
/* Default MTU values. */
@@ -815,18 +875,20 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
peer_session_id, cfg);
if (IS_ERR(session)) {
error = PTR_ERR(session);
- goto out;
+ goto err;
}
- ps = l2tp_session_priv(session);
- ps->tunnel_sock = tunnel->sock;
+ pppol2tp_session_init(session);
- l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n",
- session->name);
+ error = l2tp_session_register(session, tunnel);
+ if (error < 0)
+ goto err_sess;
- error = 0;
+ return 0;
-out:
+err_sess:
+ kfree(session);
+err:
return error;
}
@@ -987,8 +1049,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
"%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
session->name, cmd, arg);
- sk = ps->sock;
- sock_hold(sk);
+ sk = pppol2tp_session_get_sock(session);
+ if (!sk)
+ return -EBADR;
switch (cmd) {
case SIOCGIFMTU:
@@ -1135,13 +1198,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
/* resend to session ioctl handler */
struct l2tp_session *session =
l2tp_session_get(sock_net(sk), tunnel,
- stats.session_id, true);
+ stats.session_id);
if (session) {
err = pppol2tp_session_ioctl(session, cmd,
arg);
- if (session->deref)
- session->deref(session);
l2tp_session_dec_refcount(session);
} else {
err = -EBADR;
@@ -1265,7 +1326,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
int optname, int val)
{
int err = 0;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
switch (optname) {
case PPPOL2TP_SO_RECVSEQ:
@@ -1286,8 +1346,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
}
session->send_seq = !!val;
{
- struct sock *ssk = ps->sock;
- struct pppox_sock *po = pppox_sk(ssk);
+ struct pppox_sock *po = pppox_sk(sk);
+
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
@@ -1558,7 +1618,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
pd->session_idx++;
if (pd->session == NULL) {
@@ -1626,8 +1686,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
{
struct l2tp_session *session = v;
struct l2tp_tunnel *tunnel = session->tunnel;
- struct pppol2tp_session *ps = l2tp_session_priv(session);
- struct pppox_sock *po = pppox_sk(ps->sock);
+ unsigned char state;
+ char user_data_ok;
+ struct sock *sk;
u32 ip = 0;
u16 port = 0;
@@ -1637,6 +1698,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
port = ntohs(inet->inet_sport);
}
+ sk = pppol2tp_session_get_sock(session);
+ if (sk) {
+ state = sk->sk_state;
+ user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N';
+ } else {
+ state = 0;
+ user_data_ok = 'N';
+ }
+
seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
"%04X/%04X %d %c\n",
session->name, ip, port,
@@ -1644,9 +1714,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
session->session_id,
tunnel->peer_tunnel_id,
session->peer_session_id,
- ps->sock->sk_state,
- (session == ps->sock->sk_user_data) ?
- 'Y' : 'N');
+ state, user_data_ok);
seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
session->mtu, session->mru,
session->recv_seq ? 'R' : '-',
@@ -1663,8 +1731,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
atomic_long_read(&session->stats.rx_bytes),
atomic_long_read(&session->stats.rx_errors));
- if (po)
+ if (sk) {
+ struct pppox_sock *po = pppox_sk(sk);
+
seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
+ sock_put(sk);
+ }
}
static int pppol2tp_seq_show(struct seq_file *m, void *v)
@@ -1689,8 +1761,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
pppol2tp_seq_tunnel_show(m, pd->tunnel);
} else {
pppol2tp_seq_session_show(m, pd->session);
- if (pd->session->deref)
- pd->session->deref(pd->session);
l2tp_session_dec_refcount(pd->session);
}
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e..db6e0af 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
skb_queue_head_init(&lapb->write_queue);
skb_queue_head_init(&lapb->ack_queue);
- init_timer(&lapb->t1timer);
- init_timer(&lapb->t2timer);
+ timer_setup(&lapb->t1timer, NULL, 0);
+ timer_setup(&lapb->t2timer, NULL, 0);
lapb->t1 = LAPB_DEFAULT_T1;
lapb->t2 = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535b..8bb469c 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
#include <linux/interrupt.h>
#include <net/lapb.h>
-static void lapb_t1timer_expiry(unsigned long);
-static void lapb_t2timer_expiry(unsigned long);
+static void lapb_t1timer_expiry(struct timer_list *);
+static void lapb_t2timer_expiry(struct timer_list *);
void lapb_start_t1timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t1timer);
- lapb->t1timer.data = (unsigned long)lapb;
- lapb->t1timer.function = &lapb_t1timer_expiry;
+ lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry;
lapb->t1timer.expires = jiffies + lapb->t1;
add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t2timer);
- lapb->t2timer.data = (unsigned long)lapb;
- lapb->t2timer.function = &lapb_t2timer_expiry;
+ lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry;
lapb->t2timer.expires = jiffies + lapb->t2;
add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
return timer_pending(&lapb->t1timer);
}
-static void lapb_t2timer_expiry(unsigned long param)
+static void lapb_t2timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
}
}
-static void lapb_t1timer_expiry(unsigned long param)
+static void lapb_t1timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
switch (lapb->state) {
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index ea225bd..f596480 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
+static void llc_conn_tmr_common_cb(struct sock *sk, u8 type)
{
- struct sock *sk = (struct sock *)timeout_data;
struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
bh_lock_sock(sk);
@@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type)
bh_unlock_sock(sk);
}
-void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data)
+void llc_conn_pf_cycle_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR);
+ struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR);
}
-void llc_conn_busy_tmr_cb(unsigned long timeout_data)
+void llc_conn_busy_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR);
+ struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR);
}
-void llc_conn_ack_tmr_cb(unsigned long timeout_data)
+void llc_conn_ack_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR);
+ struct llc_sock *llc = from_timer(llc, t, ack_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR);
}
-void llc_conn_rej_tmr_cb(unsigned long timeout_data)
+void llc_conn_rej_tmr_cb(struct timer_list *t)
{
- llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR);
+ struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer);
+
+ llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR);
}
int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5e91b47..9177dbb 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk)
llc->inc_cntr = llc->dec_cntr = 2;
llc->dec_step = llc->connect_step = 1;
- setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0);
llc->ack_timer.expire = sysctl_llc2_ack_timeout;
- setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0);
llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout;
- setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0);
llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout;
- setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb,
- (unsigned long)sk);
+ timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0);
llc->busy_state_timer.expire = sysctl_llc2_busy_timeout;
llc->n2 = 2; /* max retransmit */
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index 799bafc..8443a6d 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sysctl_net_llc.c: sysctl interface to LLC net subsystem.
*
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 2829122..e3589ad 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_MAC80211) += mac80211.o
# mac80211 objects
@@ -6,6 +7,7 @@ mac80211-y := \
driver-ops.o \
sta_info.o \
wep.o \
+ aead_api.o \
wpa.o \
scan.o offchannel.o \
ht.o agg-tx.o agg-rx.o \
@@ -15,8 +17,6 @@ mac80211-y := \
rate.o \
michael.o \
tkip.o \
- aes_ccm.o \
- aes_gcm.o \
aes_cmac.o \
aes_gmac.o \
fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59..160f9df 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
/*
* Copyright 2003-2004, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
*
* Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
@@ -12,30 +13,29 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/scatterlist.h>
#include <crypto/aead.h>
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_ccm.h"
+#include "aead_api.h"
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
u8 *__aad;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return 0;
}
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
if (data_len == 0)
return -EINVAL;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return err;
}
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len)
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len)
{
struct crypto_aead *tfm;
int err;
- tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
return tfm;
@@ -109,7 +109,7 @@ free_aead:
return ERR_PTR(err);
}
-void ieee80211_aes_key_free(struct crypto_aead *tfm)
+void aead_key_free(struct crypto_aead *tfm)
{
crypto_free_aead(tfm);
}
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 0000000..5e39ea8
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _AEAD_API_H
+#define _AEAD_API_H
+
+#include <crypto/aead.h>
+#include <linux/crypto.h>
+
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len);
+
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+void aead_key_free(struct crypto_aead *tfm);
+
+#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254..e9b7ca0 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
#ifndef AES_CCM_H
#define AES_CCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define CCM_AAD_LEN 32
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len);
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-void ieee80211_aes_key_free(struct crypto_aead *tfm);
+static inline struct crypto_aead *
+ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
+{
+ return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
+}
+
+static inline int
+ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int
+ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397c..0000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2014-2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <crypto/aead.h>
-
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_gcm.h"
-
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- crypto_aead_encrypt(aead_req);
- kzfree(aead_req);
- return 0;
-}
-
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
- int err;
-
- if (data_len == 0)
- return -EINVAL;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg,
- data_len + IEEE80211_GCMP_MIC_LEN, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- err = crypto_aead_decrypt(aead_req);
- kzfree(aead_req);
-
- return err;
-}
-
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len)
-{
- struct crypto_aead *tfm;
- int err;
-
- tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return tfm;
-
- err = crypto_aead_setkey(tfm, key, key_len);
- if (err)
- goto free_aead;
- err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
- if (err)
- goto free_aead;
-
- return tfm;
-
-free_aead:
- crypto_free_aead(tfm);
- return ERR_PTR(err);
-}
-
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
-{
- crypto_free_aead(tfm);
-}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed53..d2b0960 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
#ifndef AES_GCM_H
#define AES_GCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define GCM_AAD_LEN 32
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len);
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline struct crypto_aead *
+ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
+{
+ return aead_key_setup_encrypt("gcm(aes)", key,
+ key_len, IEEE80211_GCMP_MIC_LEN);
+}
+
+static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2849a1f..88cc1ae 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -459,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
- const u8 *addr, unsigned int bit)
+ const u8 *addr, unsigned int tid)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
@@ -470,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
if (!sta)
goto unlock;
- set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl);
+ set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
unlock:
rcu_read_unlock();
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a354f19..fb15d3b9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
if (!ieee80211_sdata_running(sdata))
return -ENETDOWN;
- if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
- ret = drv_set_bitrate_mask(local, sdata, mask);
- if (ret)
- return ret;
- }
-
/*
* If active validate the setting and reject it if it doesn't leave
* at least one basic rate usable, since we really have to be able
@@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
return -EINVAL;
}
+ if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ ret = drv_set_bitrate_mask(local, sdata, mask);
+ if (ret)
+ return ret;
+ }
+
for (i = 0; i < NUM_NL80211_BANDS; i++) {
struct ieee80211_supported_band *sband = wiphy->bands[i];
int j;
diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h
index 1956b31..d90a8f9 100644
--- a/net/mac80211/debug.h
+++ b/net/mac80211/debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUG_H
#define __MAC80211_DEBUG_H
#include <net/cfg80211.h>
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 60c35af..d2c4247 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_H
#define __MAC80211_DEBUGFS_H
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index 32adc77..1cd7b8b 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_KEY_H
#define __MAC80211_DEBUGFS_KEY_H
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index 9f5501a..a7e9d8d 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* routines exported for debugfs handling */
#ifndef __IEEE80211_DEBUGFS_NETDEV_H
diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h
index 8b60890..d2e7c27 100644
--- a/net/mac80211/debugfs_sta.h
+++ b/net/mac80211/debugfs_sta.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC80211_DEBUGFS_STA_H
#define __MAC80211_DEBUGFS_STA_H
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 09f77e4..c7f93fd 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d6d0b42..41f5e48 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
{
int i;
+ mutex_lock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- __ieee80211_stop_tx_ba_session(sta, i, reason);
- __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_LEAVE_QBSS,
- reason != AGG_STOP_DESTROY_STA &&
- reason != AGG_STOP_PEER_REQUEST);
+ ___ieee80211_stop_tx_ba_session(sta, i, reason);
+ ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_LEAVE_QBSS,
+ reason != AGG_STOP_DESTROY_STA &&
+ reason != AGG_STOP_PEER_REQUEST);
}
+ mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9675814..68f874e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2009,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct txq_info *txq, int tid);
void ieee80211_txq_purge(struct ieee80211_local *local,
struct txq_info *txqi);
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f75029a..13b16f9 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -793,9 +793,7 @@ static int ieee80211_open(struct net_device *dev)
static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
- struct ieee80211_sub_if_data *txq_sdata = sdata;
struct ieee80211_local *local = sdata->local;
- struct fq *fq = &local->fq;
unsigned long flags;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
@@ -939,9 +937,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
- txq_sdata = container_of(sdata->bss,
- struct ieee80211_sub_if_data, u.ap);
-
mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
mutex_unlock(&local->mtx);
@@ -998,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
skb_queue_purge(&sdata->skb_queue);
}
- sdata->bss = NULL;
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1012,22 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
- if (txq_sdata->vif.txq) {
- struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq);
-
- /*
- * FIXME FIXME
- *
- * We really shouldn't purge the *entire* txqi since that
- * contains frames for the other AP_VLANs (and possibly
- * the AP itself) as well, but there's no API in FQ now
- * to be able to filter.
- */
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ ieee80211_txq_remove_vlan(local, sdata);
- spin_lock_bh(&fq->lock);
- ieee80211_txq_purge(local, txqi);
- spin_unlock_bh(&fq->lock);
- }
+ sdata->bss = NULL;
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
@@ -1772,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sizeof(void *));
int txq_size = 0;
- if (local->ops->wake_tx_queue)
+ if (local->ops->wake_tx_queue &&
+ type != NL80211_IFTYPE_AP_VLAN &&
+ type != NL80211_IFTYPE_MONITOR)
txq_size += sizeof(struct txq_info) +
local->hw.txq_data_size;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index a98fc2b..9380493 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2015 Intel Deutschland GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
+#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
ieee80211_key_free_common(key);
}
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
+{
+ u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+ u8 *tk_old, *tk_new;
+
+ if (!old || new->conf.keylen != old->conf.keylen)
+ return false;
+
+ tk_old = old->conf.key;
+ tk_new = new->conf.key;
+
+ /*
+ * In station mode, don't compare the TX MIC key, as it's never used
+ * and offloaded rekeying may not care to send it to the host. This
+ * is the case in iwlwifi, for example.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+ new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+ !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+ memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+ memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ tk_old = tkip_old;
+ tk_new = tkip_new;
+ }
+
+ return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
@@ -620,9 +654,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
- key->local = sdata->local;
- key->sdata = sdata;
- key->sta = sta;
mutex_lock(&sdata->local->key_mtx);
@@ -633,6 +664,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
else
old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ /*
+ * Silently accept key re-installation without really installing the
+ * new version of the key to avoid nonce reuse or replay issues.
+ */
+ if (ieee80211_key_identical(sdata, old_key, key)) {
+ ieee80211_key_free_unused(key);
+ ret = 0;
+ goto out;
+ }
+
+ key->local = sdata->local;
+ key->sdata = sdata;
+ key->sta = sta;
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -648,6 +693,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
ret = 0;
}
+ out:
mutex_unlock(&sdata->local->key_mtx);
return ret;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c70..7a76c4a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
enum nl80211_band band;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
- sizeof(mgmt->u.beacon);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
rcu_read_lock();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271..465b785 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr, struct ieee802_11_elems *ie);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+void mesh_plink_timer(struct timer_list *t);
void mesh_plink_broken(struct sta_info *sta);
u32 mesh_plink_deactivate(struct sta_info *sta);
u32 mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d..146ec6c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
if (time_before(jiffies, ifmsh->next_perr))
return -EAGAIN;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c3..e2d00cc 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
bool include_plid = false;
u16 peering_proto = 0;
u8 *pos, ie_len = 4;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) +
- sizeof(mgmt->u.action.u.self_prot);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
ieee80211_mbss_info_change_notify(sdata, changed);
}
-static void mesh_plink_timer(unsigned long data)
+void mesh_plink_timer(struct timer_list *t)
{
+ struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
struct sta_info *sta;
u16 reason = 0;
struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
* del_timer_sync() this timer after having made sure
* it cannot be readded (by deleting the plink.)
*/
- sta = (struct sta_info *) data;
+ sta = mesh->plink_sta;
if (sta->sdata->local->quiescing)
return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
{
- sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
- sta->mesh->plink_timer.data = (unsigned long) sta;
- sta->mesh->plink_timer.function = mesh_plink_timer;
sta->mesh->plink_timeout = timeout;
- add_timer(&sta->mesh->plink_timer);
+ mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
}
static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3b8e270..e4ededa 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
- const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->center_freq1 = channel->center_freq;
chandef->center_freq2 = 0;
- if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) {
+ if (!ht_oper || !sta_ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
goto out;
}
chandef->width = NL80211_CHAN_WIDTH_20;
- if (!(ht_cap->cap_info &
- cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
- ret = IEEE80211_STA_DISABLE_40MHZ;
- vht_chandef = *chandef;
- goto out;
- }
-
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* calculate new channel (type) based on HT/VHT operation IEs */
flags = ieee80211_determine_chantype(sdata, sband, chan,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, true);
/*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
WLAN_EID_HT_CAPABILITY,
WLAN_EID_BSS_COEX_2040,
+ /* luckily this is almost always there */
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
- /* 60GHz doesn't happen right now */
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
WLAN_EID_VHT_CAPABILITY,
WLAN_EID_OPMODE_NOTIF,
};
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
/* if present, add any custom IEs that go before VHT */
if (assoc_data->ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_PWR_CAPABILITY,
- WLAN_EID_SUPPORTED_CHANNELS,
- WLAN_EID_RSN,
- WLAN_EID_QOS_CAPA,
- WLAN_EID_RRM_ENABLED_CAPABILITIES,
- WLAN_EID_MOBILITY_DOMAIN,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off before HT
+ * or generated here
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
/* RIC already taken above, so no need to handle here anymore */
@@ -4317,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, false);
sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index a87d195..38c45e1 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <net/mac80211.h>
#include <net/rtnetlink.h>
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed5..ef2beca 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright 2016 Intel Deutschland GmbH
+ * Copyright 2016-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
return bss;
}
+static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ u32 scan_flags, const u8 *da)
+{
+ if (!sdata)
+ return false;
+ /* accept broadcast for OCE */
+ if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
+ is_broadcast_ether_addr(da))
+ return true;
+ if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
+ return true;
+ return ether_addr_equal(da, sdata->vif.addr);
+}
+
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
+ u32 scan_req_flags = 0, sched_scan_req_flags = 0;
scan_req = rcu_dereference(local->scan_req);
sched_scan_req = rcu_dereference(local->sched_scan_req);
- /* ignore ProbeResp to foreign address unless scanning
- * with randomised address
+ if (scan_req)
+ scan_req_flags = scan_req->flags;
+
+ if (sched_scan_req)
+ sched_scan_req_flags = sched_scan_req->flags;
+
+ /* ignore ProbeResp to foreign address or non-bcast (OCE)
+ * unless scanning with randomised address
*/
- if (!(sdata1 &&
- (ether_addr_equal(mgmt->da, sdata1->vif.addr) ||
- scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) &&
- !(sdata2 &&
- (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
- sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
+ if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ mgmt->da) &&
+ !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ mgmt->da))
return;
elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6961501..9673e15 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
if (!sta->mesh)
goto free;
+ sta->mesh->plink_sta = sta;
spin_lock_init(&sta->mesh->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
- init_timer(&sta->mesh->plink_timer);
+ timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+ 0);
sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
}
#endif
@@ -515,6 +517,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
return err;
}
+static void
+ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ bool allow_p2p_go_ps = sdata->vif.p2p;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata ||
+ !test_sta_flag(sta, WLAN_STA_ASSOC))
+ continue;
+ if (!sta->sta.support_p2p_ps) {
+ allow_p2p_go_ps = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+ sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+ }
+}
+
/*
* should be called with sta_mtx locked
* this function replaces the mutex lock
@@ -561,6 +588,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
goto out_remove;
set_sta_flag(sta, WLAN_STA_INSERTED);
+
+ if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+ ieee80211_recalc_min_chandef(sta->sdata);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+
/* accept BA sessions now */
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -1788,31 +1822,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-static void
-ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- bool allow_p2p_go_ps = sdata->vif.p2p;
- struct sta_info *sta;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (sdata != sta->sdata ||
- !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
- if (!sta->sta.support_p2p_ps) {
- allow_p2p_go_ps = false;
- break;
- }
- }
- rcu_read_unlock();
-
- if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
- sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
- }
-}
-
int sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state)
{
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa..5c54acd 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
* @plink_state: peer link state
* @plink_timeout: timeout of peer link
* @plink_timer: peer link watch timer
+ * @plink_sta: peer link watch timer's sta_info
* @t_offset: timing offset relative to this host
* @t_offset_setpoint: reference timing offset of this sta to be used when
* calculating clockdrift
@@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
*/
struct mesh_sta {
struct timer_list plink_timer;
+ struct sta_info *plink_sta;
s64 t_offset;
s64 t_offset_setpoint;
@@ -398,7 +400,7 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
-/**
+/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
* value will be scaled by the number of active stations when it is being
diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c
index edfe0c1..8378572 100644
--- a/net/mac80211/trace.c
+++ b/net/mac80211/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* bug in tracepoint.h, it should include this */
#include <linux/module.h>
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d9ac17..591ad02 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h
index 768f7c2..366b9e6 100644
--- a/net/mac80211/trace_msg.h
+++ b/net/mac80211/trace_msg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9482668..7b81544 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1396,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
fq_flow_get_default_func);
}
+static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+ struct fq_flow *flow, struct sk_buff *skb,
+ void *data)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ return info->control.vif == data;
+}
+
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct fq *fq = &local->fq;
+ struct txq_info *txqi;
+ struct fq_tin *tin;
+ struct ieee80211_sub_if_data *ap;
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
+ return;
+
+ ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
+
+ if (!ap->vif.txq)
+ return;
+
+ txqi = to_txq_info(ap->vif.txq);
+ tin = &txqi->tin;
+
+ spin_lock_bh(&fq->lock);
+ fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
+ fq_skb_free_func);
+ spin_unlock_bh(&fq->lock);
+}
+
void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct txq_info *txqi, int tid)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6aef679..d57e5f6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before HT */
if (ie && ie_len) {
static const u8 before_ht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_DS_PARAMS,
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
};
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before VHT */
if (ie && ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_DS_PARAMS,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_SSID_LIST,
WLAN_EID_CHANNEL_USAGE,
WLAN_EID_INTERWORKING,
WLAN_EID_MESH_ID,
- /* 60 GHz can't happen here right now */
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
struct ieee80211_local *local = sdata->local;
int freq;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) +
- sizeof(mgmt->u.action.u.chan_switch);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.chan_switch);
u8 *pos;
if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec218..b9276ac 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
bw = ieee80211_sta_cap_rx_bw(sta);
bw = min(bw, sta->cur_max_bandwidth);
+
+ /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
+ * IEEE80211-2016 specification makes higher bandwidth operation
+ * possible on the TDLS link if the peers have wider bandwidth
+ * capability.
+ */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ return bw;
+
bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea..b58722d 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
pos += IEEE80211_CCMP_HDR_LEN;
ccmp_special_blocks(skb, pn, b_0, aad);
return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
- skb_put(skb, mic_len), mic_len);
+ skb_put(skb, mic_len));
}
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
key->u.ccmp.tfm, b_0, aad,
skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
data_len,
- skb->data + skb->len - mic_len, mic_len))
+ skb->data + skb->len - mic_len))
return RX_DROP_UNUSABLE;
}
diff --git a/net/mac802154/cfg.h b/net/mac802154/cfg.h
index e2718f9..3bb0896 100644
--- a/net/mac802154/cfg.h
+++ b/net/mac802154/cfg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* mac802154 configuration hooks for cfg802154
*/
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index fd9daf2..d23f0db 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MAC802154_DRIVER_OPS
#define __MAC802154_DRIVER_OPS
diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c
index 863e5e6..c36e3d5 100644
--- a/net/mac802154/trace.c
+++ b/net/mac802154/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#ifndef __CHECKER__
diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h
index 6f30e0c..2c8a43d 100644
--- a/net/mac802154/trace.h
+++ b/net/mac802154/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* Based on net/mac80211/trace.h */
#undef TRACE_SYSTEM
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef..801ea90 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
config MPLS_ROUTING
tristate "MPLS: routing support"
+ depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
---help---
Add support for forwarding of mpls packets.
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c5b9ce4..8ca9915 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/netevent.h>
+#include <net/ip_tunnels.h>
#include <net/netns/generic.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
static int label_limit = (1 << 20) - 1;
static int ttl_max = 255;
+#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
+static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct mpls_shim_hdr);
+}
+
+static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
+ .encap_hlen = ipgre_mpls_encap_hlen,
+};
+
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+ ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+#else
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return 0;
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+}
+#endif
+
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
unsigned int nlm_flags);
@@ -2485,6 +2516,10 @@ static int __init mpls_init(void)
0);
rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
mpls_netconf_dump_devconf, 0);
+ err = ipgre_tunnel_encap_add_mpls_ops();
+ if (err)
+ pr_err("Can't add mpls over gre tunnel ops\n");
+
err = 0;
out:
return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
dev_remove_pack(&mpls_packet_type);
unregister_netdevice_notifier(&mpls_dev_notifier);
unregister_pernet_subsys(&mpls_net_ops);
+ ipgre_tunnel_encap_del_mpls_ops();
}
module_exit(mpls_exit);
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index cf65aec..768a302 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MPLS_INTERNAL_H
#define MPLS_INTERNAL_H
#include <net/mpls.h>
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index af3d636..d30f7bd 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -286,6 +286,7 @@ struct ncsi_dev_priv {
struct work_struct work; /* For channel management */
struct packet_type ptype; /* NCSI packet Rx handler */
struct list_head node; /* Form NCSI device list */
+#define NCSI_MAX_VLAN_VIDS 15
struct list_head vlan_vids; /* List of active VLAN IDs */
};
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 6898e72..f135938 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -187,7 +187,7 @@ static struct ncsi_aen_handler {
} ncsi_aen_handlers[] = {
{ NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc },
{ NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr },
- { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc }
+ { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc }
};
int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 3fd3c39..47baf91 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -38,7 +38,7 @@ static inline int ncsi_filter_size(int table)
return sizes[table];
}
-u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
+static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index)
{
struct ncsi_channel_filter *ncf;
int size;
@@ -189,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data)
struct ncsi_channel *nc = (struct ncsi_channel *)data;
struct ncsi_package *np = nc->package;
struct ncsi_dev_priv *ndp = np->ndp;
+ struct ncsi_channel_mode *ncm;
struct ncsi_cmd_arg nca;
bool enabled, chained;
unsigned int monitor_state;
@@ -202,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data)
monitor_state = nc->monitor.state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!enabled || chained)
+ if (!enabled || chained) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
if (state != NCSI_CHANNEL_INACTIVE &&
- state != NCSI_CHANNEL_ACTIVE)
+ state != NCSI_CHANNEL_ACTIVE) {
+ ncsi_stop_channel_monitor(nc);
return;
+ }
switch (monitor_state) {
case NCSI_CHANNEL_MONITOR_START:
@@ -217,28 +222,28 @@ static void ncsi_channel_monitor(unsigned long data)
nca.type = NCSI_PKT_CMD_GLS;
nca.req_flags = 0;
ret = ncsi_xmit_cmd(&nca);
- if (ret) {
+ if (ret)
netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
ret);
- return;
- }
-
break;
case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
break;
default:
- if (!(ndp->flags & NCSI_DEV_HWA) &&
- state == NCSI_CHANNEL_ACTIVE) {
+ if (!(ndp->flags & NCSI_DEV_HWA)) {
ncsi_report_link(ndp, true);
ndp->flags |= NCSI_DEV_RESHUFFLE;
}
+ ncsi_stop_channel_monitor(nc);
+
+ ncm = &nc->modes[NCSI_MODE_LINK];
spin_lock_irqsave(&nc->lock, flags);
nc->state = NCSI_CHANNEL_INVISIBLE;
+ ncm->data[2] &= ~0x1;
spin_unlock_irqrestore(&nc->lock, flags);
spin_lock_irqsave(&ndp->lock, flags);
- nc->state = NCSI_CHANNEL_INACTIVE;
+ nc->state = NCSI_CHANNEL_ACTIVE;
list_add_tail_rcu(&nc->link, &ndp->channel_queue);
spin_unlock_irqrestore(&ndp->lock, flags);
ncsi_process_next_channel(ndp);
@@ -732,6 +737,10 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
if (index < 0) {
netdev_err(ndp->ndev.dev,
"Failed to add new VLAN tag, error %d\n", index);
+ if (index == -ENOSPC)
+ netdev_err(ndp->ndev.dev,
+ "Channel %u already has all VLAN filters set\n",
+ nc->id);
return -1;
}
@@ -998,12 +1007,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
struct ncsi_package *np;
struct ncsi_channel *nc;
unsigned int cap;
+ bool has_channel = false;
/* The hardware arbitration is disabled if any one channel
* doesn't support explicitly.
*/
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
+ has_channel = true;
+
cap = nc->caps[NCSI_CAP_GENERIC].cap;
if (!(cap & NCSI_CAP_GENERIC_HWA) ||
(cap & NCSI_CAP_GENERIC_HWA_MASK) !=
@@ -1014,8 +1026,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
}
}
- ndp->flags |= NCSI_DEV_HWA;
- return true;
+ if (has_channel) {
+ ndp->flags |= NCSI_DEV_HWA;
+ return true;
+ }
+
+ ndp->flags &= ~NCSI_DEV_HWA;
+ return false;
}
static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
@@ -1403,7 +1420,6 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
{
- struct ncsi_channel_filter *ncf;
struct ncsi_dev_priv *ndp;
unsigned int n_vids = 0;
struct vlan_vid *vlan;
@@ -1420,7 +1436,6 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
}
ndp = TO_NCSI_DEV_PRIV(nd);
- ncf = ndp->hot_channel->filters[NCSI_FILTER_VLAN];
/* Add the VLAN id to our internal list */
list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
@@ -1431,12 +1446,11 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
return 0;
}
}
-
- if (n_vids >= ncf->total) {
- netdev_info(dev,
- "NCSI Channel supports up to %u VLAN tags but %u are already set\n",
- ncf->total, n_vids);
- return -EINVAL;
+ if (n_vids >= NCSI_MAX_VLAN_VIDS) {
+ netdev_warn(dev,
+ "tried to add vlan id %u but NCSI max already registered (%u)\n",
+ vid, NCSI_MAX_VLAN_VIDS);
+ return -ENOSPC;
}
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 265b9a8..927dad4 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -959,7 +959,7 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf },
{ NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf },
{ NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc },
- { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi },
+ { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi },
{ NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc },
{ NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp },
{ NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps },
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d3891c9..f78ed24 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 28ec148..a445a6b 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the ipset modules
#
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52..5ca18f0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct mtype *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -272,10 +272,10 @@ out:
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct mtype *map = set->data;
+ struct mtype *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
void *x;
u32 id;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783eff..d8975a0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->netmask = netmask;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f6..4c279fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* MAC + data extensions */
__aligned(__alignof__(u64));
};
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->elements = elements;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733..7f9bbd7 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_UNSPEC;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e495b5e..cf84f7b 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
- if (from->ref_netlink || to->ref_netlink)
+ write_lock_bh(&ip_set_ref_lock);
+
+ if (from->ref_netlink || to->ref_netlink) {
+ write_unlock_bh(&ip_set_ref_lock);
return -EBUSY;
+ }
strncpy(from_name, from->name, IPSET_MAXNAMELEN);
strncpy(from->name, to->name, IPSET_MAXNAMELEN);
strncpy(to->name, from_name, IPSET_MAXNAMELEN);
- write_lock_bh(&ip_set_ref_lock);
swap(from->ref, to->ref);
ip_set(inst, from_id) = to;
ip_set(inst, to_id) = from;
@@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = {
static int __init
ip_set_init(void)
{
- int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+ int ret = register_pernet_subsys(&ip_set_net_ops);
+
+ if (ret) {
+ pr_err("ip_set: cannot register pernet_subsys.\n");
+ return ret;
+ }
+ ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
if (ret != 0) {
pr_err("ip_set: cannot register with nfnetlink.\n");
+ unregister_pernet_subsys(&ip_set_net_ops);
return ret;
}
+
ret = nf_register_sockopt(&so_set);
if (ret != 0) {
pr_err("SO_SET registry failed: %d\n", ret);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+ unregister_pernet_subsys(&ip_set_net_ops);
return ret;
}
- ret = register_pernet_subsys(&ip_set_net_ops);
- if (ret) {
- pr_err("ip_set: cannot register pernet_subsys.\n");
- nf_unregister_sockopt(&so_set);
- nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
- return ret;
- }
+
pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL);
return 0;
}
@@ -2098,9 +2104,10 @@ ip_set_init(void)
static void __exit
ip_set_fini(void)
{
- unregister_pernet_subsys(&ip_set_net_ops);
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
+
+ unregister_pernet_subsys(&ip_set_net_ops);
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 51063d9..efffc8e 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
struct htype {
struct htable __rcu *table; /* the hash table */
struct timer_list gc; /* garbage collection when timeout enabled */
+ struct ip_set *set; /* attached to this ip_set */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
}
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct htype *h = set->data;
- setup_timer(&h->gc, gc, (unsigned long)set);
+ timer_setup(&h->gc, gc, 0);
mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
pr_debug("gc initialized, run in every %u\n",
IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct htype *h = set->data;
+ struct htype *h = from_timer(h, t, gc);
+ struct ip_set *set = h->set;
pr_debug("called\n");
spin_lock_bh(&set->lock);
@@ -1314,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
t->htable_bits = hbits;
RCU_INIT_POINTER(h->table, t);
+ h->set = set;
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 20bfbd3..613eb21 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
ip &= ip_set_hostmask(h->netmask);
+ e.ip = htonl(ip);
+ if (e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
- if (adt == IPSET_TEST) {
- e.ip = htonl(ip);
- if (e.ip == 0)
- return -IPSET_ERR_HASH_ELEM;
+ if (adt == IPSET_TEST)
return adtfn(set, &e, &ext, &ext, flags);
- }
ip_to = ip;
if (tb[IPSET_ATTR_IP_TO]) {
@@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- if (retried)
+ if (retried) {
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip += hosts) {
e.ip = htonl(ip);
- if (e.ip == 0)
- return -IPSET_ERR_HASH_ELEM;
+ }
+ for (; ip <= ip_to;) {
ret = adtfn(set, &e, &ext, &ext, flags);
-
if (ret && !ip_set_eexist(ret, flags))
return ret;
+ ip += hosts;
+ e.ip = htonl(ip);
+ if (e.ip == 0)
+ return 0;
+
ret = 0;
}
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index b64cf14..f3ba834 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
e.ip = htonl(ip);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index f438740..ddb8039 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 6215fb8..a7f4d7a 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
for (; p <= port_to; p++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 24bf558..0f164e9 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- for (; !before(ip_to, ip); ip++) {
+ for (; ip <= ip_to; ip++) {
e.ip = htonl(ip);
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
@@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip == ntohl(h->next.ip) &&
p == ntohs(h->next.port)
? ntohl(h->next.ip2) : ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip2 = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
&cidr);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 5d9e8954..1c67a17 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 44cf119..d417074 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index db614e1..7f9ae2e 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip[0]);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip[0] = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
ip2 = (retried &&
ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1])
: ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip[1] = htonl(ip2);
last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 54b64b6..e6ef382 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index aff8469..8602f25 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (retried)
ip = ntohl(h->next.ip[0]);
- while (!after(ip, ip_to)) {
+ while (ip <= ip_to) {
e.ip[0] = htonl(ip);
ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port)
@@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
ip2 = (retried && ip == ntohl(h->next.ip[0]) &&
p == ntohs(h->next.port)) ? ntohl(h->next.ip[1])
: ip2_from;
- while (!after(ip2, ip2_to)) {
+ while (ip2 <= ip2_to) {
e.ip[1] = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
&e.cidr[1]);
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 2fff6b5..e864681 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
struct list_set {
u32 size; /* size of set list array */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
struct net *net; /* namespace */
struct list_head members; /* the set members */
};
@@ -568,10 +569,10 @@ static const struct ip_set_type_variant set_variant = {
};
static void
-list_set_gc(unsigned long ul_set)
+list_set_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct list_set *map = set->data;
+ struct list_set *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
spin_lock_bh(&set->lock);
set_cleanup_entries(set);
@@ -582,11 +583,11 @@ list_set_gc(unsigned long ul_set)
}
static void
-list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct list_set *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -603,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
map->size = size;
map->net = net;
+ map->set = set;
INIT_LIST_HEAD(&map->members);
set->data = map;
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 67f3f43..c552993f 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the IPVS modules on top of IPv4.
#
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 57c8ee6..eff7569 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/sctp.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 0e5b64a..13f7408 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IPVS An implementation of the IP virtual server support for the
* LINUX operating system. IPVS is now implemented as a module
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 90d3968..4527921 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -921,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
{
struct sk_buff *new_skb = NULL;
struct iphdr *old_iph = NULL;
+ __u8 old_dsfield;
#ifdef CONFIG_IP_VS_IPV6
struct ipv6hdr *old_ipv6h = NULL;
#endif
@@ -945,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
*payload_len =
ntohs(old_ipv6h->payload_len) +
sizeof(*old_ipv6h);
- *dsfield = ipv6_get_dsfield(old_ipv6h);
+ old_dsfield = ipv6_get_dsfield(old_ipv6h);
*ttl = old_ipv6h->hop_limit;
if (df)
*df = 0;
@@ -960,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
/* fix old IP header checksum */
ip_send_check(old_iph);
- *dsfield = ipv4_get_dsfield(old_iph);
+ old_dsfield = ipv4_get_dsfield(old_iph);
*ttl = old_iph->ttl;
if (payload_len)
*payload_len = ntohs(old_iph->tot_len);
}
+ /* Implement full-functionality option for ECN encapsulation */
+ *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield);
+
return skb;
error:
kfree_skb(skb);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 49f87ec..44284cd 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NF_INTERNALS_H
#define _NF_INTERNALS_H
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index d2a9e6b..46cb378 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1d66be0..d8327b4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1048,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
- if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
+ if (basechain->stats && nft_dump_stats(skb, basechain->stats))
goto nla_put_failure;
}
@@ -1487,8 +1487,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME],
genmask);
- if (IS_ERR(chain2))
- return PTR_ERR(chain2);
+ if (!IS_ERR(chain2))
+ return -EEXIST;
}
if (nla[NFTA_CHAIN_COUNTERS]) {
@@ -2736,8 +2736,10 @@ cont:
list_for_each_entry(i, &ctx->table->sets, list) {
if (!nft_is_active_next(ctx->net, i))
continue;
- if (!strcmp(set->name, i->name))
+ if (!strcmp(set->name, i->name)) {
+ kfree(set->name);
return -ENFILE;
+ }
}
return 0;
}
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index c68a7e0..f8166c1 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -682,7 +682,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc,
{
if (desc->size) {
switch (desc->klen) {
- case 2:
case 4:
return &nft_hash_fast_ops;
default:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a164e51..a77dd51 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
return ERR_PTR(-EFAULT);
- strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1);
info->num_counters = compat_tmp.num_counters;
user += sizeof(compat_tmp);
} else
@@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
if (copy_from_user(info, user, sizeof(*info)) != 0)
return ERR_PTR(-EFAULT);
- info->name[sizeof(info->name) - 1] = '\0';
user += sizeof(*info);
}
+ info->name[sizeof(info->name) - 1] = '\0';
size = sizeof(struct xt_counters);
size *= info->num_counters;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 38986a9..041da0d 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -8,6 +8,7 @@
*/
#include <linux/module.h>
+#include <linux/syscalls.h>
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <linux/bpf.h>
@@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
return 0;
}
+static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
+{
+ mm_segment_t oldfs = get_fs();
+ int retval, fd;
+
+ set_fs(KERNEL_DS);
+ fd = bpf_obj_get_user(path, 0);
+ set_fs(oldfs);
+ if (fd < 0)
+ return fd;
+
+ retval = __bpf_mt_check_fd(fd, ret);
+ sys_close(fd);
+ return retval;
+}
+
static int bpf_mt_check(const struct xt_mtchk_param *par)
{
struct xt_bpf_info *info = par->matchinfo;
@@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par)
return __bpf_mt_check_bytecode(info->bpf_program,
info->bpf_program_num_elem,
&info->filter);
- else if (info->mode == XT_BPF_MODE_FD_PINNED ||
- info->mode == XT_BPF_MODE_FD_ELF)
+ else if (info->mode == XT_BPF_MODE_FD_ELF)
return __bpf_mt_check_fd(info->fd, &info->filter);
+ else if (info->mode == XT_BPF_MODE_PATH_PINNED)
+ return __bpf_mt_check_path(info->path, &info->filter);
else
return -EINVAL;
}
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
index 8fd3241..68ccbe5 100644
--- a/net/netfilter/xt_repldata.h
+++ b/net/netfilter/xt_repldata.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Today's hack: quantum tunneling in structs
*
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e75ef39..575d215 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
- transparent)
+ transparent && sk_fullsock(sk))
pskb->mark = sk->sk_mark;
if (sk != skb->sk)
@@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
transparent = nf_sk_is_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
- transparent)
+ transparent && sk_fullsock(sk))
pskb->mark = sk->sk_mark;
if (sk != skb->sk)
diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile
index d341ede..5a46381 100644
--- a/net/netlabel/Makefile
+++ b/net/netlabel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the NetLabel subsystem.
#
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3278077..26ded42 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2266,14 +2266,18 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
+ if (cb->start) {
+ ret = cb->start(cb);
+ if (ret)
+ goto error_unlock;
+ }
+
nlk->cb_running = true;
mutex_unlock(nlk->cb_mutex);
- if (cb->start)
- cb->start(cb);
-
ret = netlink_dump(sk);
+
sock_put(sk);
if (ret)
@@ -2303,27 +2307,26 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
size_t tlvlen = 0;
struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
unsigned int flags = 0;
+ bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
/* Error messages get the original request appened, unless the user
* requests to cap the error message, and get extra error data if
* requested.
*/
+ if (nlk_has_extack && extack && extack->_msg)
+ tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+
if (err) {
if (!(nlk->flags & NETLINK_F_CAP_ACK))
payload += nlmsg_len(nlh);
else
flags |= NLM_F_CAPPED;
- if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
- if (extack->_msg)
- tlvlen += nla_total_size(strlen(extack->_msg) + 1);
- if (extack->bad_attr)
- tlvlen += nla_total_size(sizeof(u32));
- }
+ if (nlk_has_extack && extack && extack->bad_attr)
+ tlvlen += nla_total_size(sizeof(u32));
} else {
flags |= NLM_F_CAPPED;
- if (nlk->flags & NETLINK_F_EXT_ACK &&
- extack && extack->cookie_len)
+ if (nlk_has_extack && extack && extack->cookie_len)
tlvlen += nla_total_size(extack->cookie_len);
}
@@ -2332,16 +2335,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
- struct sock *sk;
-
- sk = netlink_lookup(sock_net(in_skb->sk),
- in_skb->sk->sk_protocol,
- NETLINK_CB(in_skb).portid);
- if (sk) {
- sk->sk_err = ENOBUFS;
- sk->sk_error_report(sk);
- sock_put(sk);
- }
+ NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
return;
}
@@ -2351,11 +2346,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
errmsg->error = err;
memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
- if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
+ if (nlk_has_extack && extack) {
+ if (extack->_msg) {
+ WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
+ extack->_msg));
+ }
if (err) {
- if (extack->_msg)
- WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
- extack->_msg));
if (extack->bad_attr &&
!WARN_ON((u8 *)extack->bad_attr < in_skb->data ||
(u8 *)extack->bad_attr >= in_skb->data +
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 3490f24..0281885 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _AF_NETLINK_H
#define _AF_NETLINK_H
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 10f8b4c..d444daf 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NETLINK Generic Netlink Family
*
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7..2dec358 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void nr_destroy_timer(unsigned long data)
+static void nr_destroy_timer(struct timer_list *t)
{
- struct sock *sk=(struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
sock_hold(sk);
nr_destroy_socket(sk);
@@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- sk->sk_timer.function = nr_destroy_timer;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer;
sk->sk_timer.expires = jiffies + 2 * HZ;
add_timer(&sk->sk_timer);
} else
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0b..fbfdae4 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
case NR_DISCREQ:
nr_write_internal(sk, NR_DISCACK);
-
+ /* fall through */
case NR_DISCACK:
nr_disconnect(sk, 0);
break;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 0c59354..75e6ba9 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign,
static void nr_remove_neigh(struct nr_neigh *);
+/* re-sort the routes in quality order. */
+static void re_sort_routes(struct nr_node *nr_node, int x, int y)
+{
+ if (nr_node->routes[y].quality > nr_node->routes[x].quality) {
+ if (nr_node->which == x)
+ nr_node->which = y;
+ else if (nr_node->which == y)
+ nr_node->which = x;
+
+ swap(nr_node->routes[x], nr_node->routes[y]);
+ }
+}
+
/*
* Add a new route to a node, and in the process add the node and the
* neighbour if it is new.
@@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
{
struct nr_node *nr_node;
struct nr_neigh *nr_neigh;
- struct nr_route nr_route;
int i, found;
struct net_device *odev;
@@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
/* Now re-sort the routes in quality order */
switch (nr_node->count) {
case 3:
- if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
- switch (nr_node->which) {
- case 0:
- nr_node->which = 1;
- break;
- case 1:
- nr_node->which = 0;
- break;
- }
- nr_route = nr_node->routes[0];
- nr_node->routes[0] = nr_node->routes[1];
- nr_node->routes[1] = nr_route;
- }
- if (nr_node->routes[2].quality > nr_node->routes[1].quality) {
- switch (nr_node->which) {
- case 1: nr_node->which = 2;
- break;
-
- case 2: nr_node->which = 1;
- break;
-
- default:
- break;
- }
- nr_route = nr_node->routes[1];
- nr_node->routes[1] = nr_node->routes[2];
- nr_node->routes[2] = nr_route;
- }
+ re_sort_routes(nr_node, 0, 1);
+ re_sort_routes(nr_node, 1, 2);
+ /* fall through */
case 2:
- if (nr_node->routes[1].quality > nr_node->routes[0].quality) {
- switch (nr_node->which) {
- case 0: nr_node->which = 1;
- break;
-
- case 1: nr_node->which = 0;
- break;
-
- default: break;
- }
- nr_route = nr_node->routes[0];
- nr_node->routes[0] = nr_node->routes[1];
- nr_node->routes[1] = nr_route;
- }
+ re_sort_routes(nr_node, 0, 1);
case 1:
break;
}
@@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
switch (i) {
case 0:
nr_node->routes[0] = nr_node->routes[1];
+ /* fall through */
case 1:
nr_node->routes[1] = nr_node->routes[2];
case 2:
@@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->routes[0] = t->routes[1];
+ /* fall through */
case 1:
t->routes[1] = t->routes[2];
case 2:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806..43569ae 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
#include <linux/interrupt.h>
#include <net/netrom.h>
-static void nr_heartbeat_expiry(unsigned long);
-static void nr_t1timer_expiry(unsigned long);
-static void nr_t2timer_expiry(unsigned long);
-static void nr_t4timer_expiry(unsigned long);
-static void nr_idletimer_expiry(unsigned long);
+static void nr_heartbeat_expiry(struct timer_list *);
+static void nr_t1timer_expiry(struct timer_list *);
+static void nr_t2timer_expiry(struct timer_list *);
+static void nr_t4timer_expiry(struct timer_list *);
+static void nr_idletimer_expiry(struct timer_list *);
void nr_init_timers(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
- setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
+ timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
+ timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
+ timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
+ timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &nr_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry;
}
void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
return timer_pending(&nr_sk(sk)->t1timer);
}
-static void nr_heartbeat_expiry(unsigned long param)
+static void nr_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct nr_sock *nr = nr_sk(sk);
bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t2timer_expiry(unsigned long param)
+static void nr_t2timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t2timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t4timer_expiry(unsigned long param)
+static void nr_t4timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct nr_sock *nr = from_timer(nr, t, t4timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
}
-static void nr_idletimer_expiry(unsigned long param)
+static void nr_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, idletimer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t1timer_expiry(unsigned long param)
+static void nr_t1timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t1timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
switch (nr->state) {
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index 2555ff8..2ffc69b 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux NFC subsystem.
#
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 0ca31d9..c3362c4 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux NFC NCI layer.
#
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 60f8090..299f447 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open vSwitch.
#
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a54a556..a551232 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1203,6 +1203,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err == -EINPROGRESS ? 0 : err;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ err = ovs_ct_clear(skb, key);
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
err = push_eth(skb, key, nla_data(a));
break;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index d558e88..b27c5c6 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
}
/* Non-ICMP, fall thru to initialize if needed. */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return err;
}
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ if (skb_nfct(skb)) {
+ nf_conntrack_put(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ ovs_ct_fill_key(skb, key);
+ }
+
+ return 0;
+}
+
static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
const struct sw_flow_key *key, bool log)
{
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1..399dfdd 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *);
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return -ENOTSUPP;
}
+static inline int ovs_ct_clear(struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ return -ENOTSUPP;
+}
+
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c3aec62..4d38ac0 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1848,7 +1848,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
- u32 portid, u32 seq, u32 flags, u8 cmd)
+ struct net *net, u32 portid, u32 seq,
+ u32 flags, u8 cmd)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1864,9 +1865,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
- ovs_vport_name(vport)))
+ ovs_vport_name(vport)) ||
+ nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
goto nla_put_failure;
+ if (!net_eq(net, dev_net(vport->dev))) {
+ int id = peernet2id_alloc(net, dev_net(vport->dev));
+
+ if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
+ goto nla_put_failure;
+ }
+
ovs_vport_get_stats(vport, &vport_stats);
if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
sizeof(struct ovs_vport_stats), &vport_stats,
@@ -1896,8 +1905,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void)
}
/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
- u32 seq, u8 cmd)
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+ u32 portid, u32 seq, u8 cmd)
{
struct sk_buff *skb;
int retval;
@@ -1906,7 +1915,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
BUG_ON(retval < 0);
return skb;
@@ -1920,6 +1929,8 @@ static struct vport *lookup_vport(struct net *net,
struct datapath *dp;
struct vport *vport;
+ if (a[OVS_VPORT_ATTR_IFINDEX])
+ return ERR_PTR(-EOPNOTSUPP);
if (a[OVS_VPORT_ATTR_NAME]) {
vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
if (!vport)
@@ -1944,6 +1955,7 @@ static struct vport *lookup_vport(struct net *net,
return vport;
} else
return ERR_PTR(-EINVAL);
+
}
/* Called with ovs_mutex */
@@ -1983,6 +1995,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
!a[OVS_VPORT_ATTR_UPCALL_PID])
return -EINVAL;
+ if (a[OVS_VPORT_ATTR_IFINDEX])
+ return -EOPNOTSUPP;
port_no = a[OVS_VPORT_ATTR_PORT_NO]
? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
@@ -2032,8 +2046,9 @@ restart:
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
update_headroom(dp);
@@ -2090,8 +2105,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
ovs_unlock();
@@ -2128,8 +2144,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
}
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_DEL);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_DEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@ -2169,8 +2186,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock_free;
- err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
- info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+ err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
+ info->snd_portid, info->snd_seq, 0,
+ OVS_VPORT_CMD_NEW);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2202,6 +2220,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
if (j >= skip &&
ovs_vport_cmd_fill_info(vport, skb,
+ sock_net(skb->sk),
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
@@ -2228,6 +2247,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+ [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
};
static const struct genl_ops dp_vport_genl_ops[] = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4806006..4a104ef 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -200,8 +200,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
uint32_t cutlen);
const char *ovs_dp_name(const struct datapath *dp);
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
- u8 cmd);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
+ u32 portid, u32 seq, u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_actions *, struct sw_flow_key *);
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 653d073..f3ee2f2 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport)
struct datapath *dp;
dp = vport->dp;
- notify = ovs_vport_cmd_build_info(vport, 0, 0,
- OVS_VPORT_CMD_DEL);
+ notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp),
+ 0, 0, OVS_VPORT_CMD_DEL);
ovs_dp_detach_port(vport);
if (IS_ERR(notify)) {
genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index e8eb427..dc0d790 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -75,6 +76,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
break;
case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
@@ -319,7 +321,8 @@ size_t ovs_tun_key_attr_size(void)
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
}
size_t ovs_key_attr_size(void)
@@ -371,6 +374,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -593,6 +597,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+ struct erspan_metadata opts;
+
+ BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+ memset(&opts, 0, sizeof(opts));
+ opts.index = nla_get_be32(attr);
+
+ /* Index has only 20-bit */
+ if (ntohl(opts.index) & ~INDEX_MASK) {
+ OVS_NLERR(log, "ERSPAN index number %x too large.",
+ ntohl(opts.index));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
+ opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+ is_mask);
+
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -700,6 +731,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -824,6 +868,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ ((struct erspan_metadata *)tun_opts)->index))
+ return -EMSGSIZE;
}
return 0;
@@ -2195,6 +2243,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
};
@@ -2479,6 +2529,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
+ [OVS_ACTION_ATTR_CT_CLEAR] = 0,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
@@ -2620,6 +2671,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
/* Disallow pushing an Ethernet header if one
* is already present */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398..2e5e7a4 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp), NULL, NULL);
+ get_dpdev(vport->dp),
+ NULL, NULL, NULL);
if (err)
goto error_unlock;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b0f2218..9603f6f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -201,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *,
static int prb_queue_frozen(struct tpacket_kbdq_core *);
static void prb_open_block(struct tpacket_kbdq_core *,
struct tpacket_block_desc *);
-static void prb_retire_rx_blk_timer_expired(unsigned long);
+static void prb_retire_rx_blk_timer_expired(struct timer_list *);
static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
-static void prb_init_blk_timer(struct packet_sock *,
- struct tpacket_kbdq_core *,
- void (*func) (unsigned long));
static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
static void prb_clear_rxhash(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
@@ -540,20 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
prb_del_retire_blk_timer(pkc);
}
-static void prb_init_blk_timer(struct packet_sock *po,
- struct tpacket_kbdq_core *pkc,
- void (*func) (unsigned long))
-{
- setup_timer(&pkc->retire_blk_timer, func, (long)po);
- pkc->retire_blk_timer.expires = jiffies;
-}
-
static void prb_setup_retire_blk_timer(struct packet_sock *po)
{
struct tpacket_kbdq_core *pkc;
pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
- prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
+ timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
+ 0);
+ pkc->retire_blk_timer.expires = jiffies;
}
static int prb_calc_retire_blk_tmo(struct packet_sock *po,
@@ -671,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
* prb_calc_retire_blk_tmo() calculates the tmo.
*
*/
-static void prb_retire_rx_blk_timer_expired(unsigned long data)
+static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
{
- struct packet_sock *po = (struct packet_sock *)data;
+ struct packet_sock *po =
+ from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
unsigned int frozen;
struct tpacket_block_desc *pbd;
@@ -1767,7 +1759,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
out:
if (err && rollover) {
- kfree(rollover);
+ kfree_rcu(rollover, rcu);
po->rollover = NULL;
}
mutex_unlock(&fanout_mutex);
@@ -1794,8 +1786,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
else
f = NULL;
- if (po->rollover)
+ if (po->rollover) {
kfree_rcu(po->rollover, rcu);
+ po->rollover = NULL;
+ }
}
mutex_unlock(&fanout_mutex);
@@ -2838,6 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
struct packet_sock *po = pkt_sk(sk);
+ bool has_vnet_hdr = false;
int hlen, tlen, linear;
int extra_len = 0;
@@ -2881,6 +2876,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
if (err)
goto out_unlock;
+ has_vnet_hdr = true;
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2939,7 +2935,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->priority = sk->sk_priority;
skb->mark = sockc.mark;
- if (po->has_vnet_hdr) {
+ if (has_vnet_hdr) {
err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
if (err)
goto out_free;
@@ -3067,13 +3063,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
int ret = 0;
bool unlisted = false;
- if (po->fanout)
- return -EINVAL;
-
lock_sock(sk);
spin_lock(&po->bind_lock);
rcu_read_lock();
+ if (po->fanout) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
if (name) {
dev = dev_get_by_name_rcu(sock_net(sk), name);
if (!dev) {
@@ -3845,6 +3843,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3923,13 +3922,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- if (!po->rollover)
+ rcu_read_lock();
+ rollover = rcu_dereference(po->rollover);
+ if (rollover) {
+ rstats.tp_all = atomic_long_read(&rollover->num);
+ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ }
+ rcu_read_unlock();
+ if (!rollover)
return -EINVAL;
- rstats.tp_all = atomic_long_read(&po->rollover->num);
- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 94d1d40..562fbc1 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PACKET_INTERNAL_H__
#define __PACKET_INTERNAL_H__
diff --git a/net/phonet/Makefile b/net/phonet/Makefile
index e10b1b1..444f875 100644
--- a/net/phonet/Makefile
+++ b/net/phonet/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_PHONET) += phonet.o pn_pep.o
phonet-y := \
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753..3b0ef69 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
#include <net/phonet/pn_dev.h>
/* Transport protocol registration */
-static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
-static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
+static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
{
- struct phonet_protocol *pp;
+ const struct phonet_protocol *pp;
if (protocol >= PHONET_NPROTO)
return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
return pp;
}
-static inline void phonet_proto_put(struct phonet_protocol *pp)
+static inline void phonet_proto_put(const struct phonet_protocol *pp)
{
module_put(pp->prot->owner);
}
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
struct pn_sock *pn;
- struct phonet_protocol *pnp;
+ const struct phonet_protocol *pnp;
int err;
if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
return 1;
}
-struct header_ops phonet_header_ops = {
+const struct header_ops phonet_header_ops = {
.create = pn_header_create,
.parse = pn_header_parse,
};
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
static DEFINE_MUTEX(proto_tab_lock);
int __init_or_module phonet_proto_register(unsigned int protocol,
- struct phonet_protocol *pp)
+ const struct phonet_protocol *pp)
{
int err = 0;
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
}
EXPORT_SYMBOL(phonet_proto_register);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol,
+ const struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e71043..b44fb90 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
.name = "PHONET",
};
-static struct phonet_protocol pn_dgram_proto = {
+static const struct phonet_protocol pn_dgram_proto = {
.ops = &phonet_dgram_ops,
.prot = &pn_proto,
.sock_type = SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e815379..9fc76b1 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
.name = "PNPIPE",
};
-static struct phonet_protocol pep_pn_proto = {
+static const struct phonet_protocol pep_pn_proto = {
.ops = &phonet_stream_ops,
.prot = &pep_proto,
.sock_type = SOCK_SEQPACKET,
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 3a6ad0f..64f9562 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -296,6 +296,6 @@ static void __exit psample_module_exit(void)
module_init(psample_module_init);
module_exit(psample_module_exit);
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("netlink channel for packet sampling");
MODULE_LICENSE("GPL v2");
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c2f5c13..e458ece 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
#include "qrtr.h"
-#define QRTR_PROTO_VER 1
+#define QRTR_PROTO_VER_1 1
+#define QRTR_PROTO_VER_2 3
/* auto-bind range */
#define QRTR_MIN_EPH_SOCKET 0x4000
#define QRTR_MAX_EPH_SOCKET 0x7fff
-enum qrtr_pkt_type {
- QRTR_TYPE_DATA = 1,
- QRTR_TYPE_HELLO = 2,
- QRTR_TYPE_BYE = 3,
- QRTR_TYPE_NEW_SERVER = 4,
- QRTR_TYPE_DEL_SERVER = 5,
- QRTR_TYPE_DEL_CLIENT = 6,
- QRTR_TYPE_RESUME_TX = 7,
- QRTR_TYPE_EXIT = 8,
- QRTR_TYPE_PING = 9,
-};
-
/**
- * struct qrtr_hdr - (I|R)PCrouter packet header
+ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
* @type: packet type; one of QRTR_TYPE_*
* @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
* @dst_node_id: destination node
* @dst_port_id: destination port
*/
-struct qrtr_hdr {
+struct qrtr_hdr_v1 {
__le32 version;
__le32 type;
__le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
__le32 dst_port_id;
} __packed;
-#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
-#define QRTR_NODE_BCAST ((unsigned int)-1)
-#define QRTR_PORT_CTRL ((unsigned int)-2)
+/**
+ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @flags: bitmask of QRTR_FLAGS_*
+ * @optlen: length of optional header data
+ * @size: length of packet, excluding this header and optlen
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr_v2 {
+ u8 version;
+ u8 type;
+ u8 flags;
+ u8 optlen;
+ __le32 size;
+ __le16 src_node_id;
+ __le16 src_port_id;
+ __le16 dst_node_id;
+ __le16 dst_port_id;
+};
+
+#define QRTR_FLAGS_CONFIRM_RX BIT(0)
+
+struct qrtr_cb {
+ u32 src_node;
+ u32 src_port;
+ u32 dst_node;
+ u32 dst_port;
+
+ u8 type;
+ u8 confirm_rx;
+};
+
+#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+ sizeof(struct qrtr_hdr_v2))
struct qrtr_sock {
/* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
struct list_head item;
};
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
/* Release node resources and free the node.
*
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
}
/* Pass an outgoing packet socket buffer to the endpoint driver. */
-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
+ struct qrtr_hdr_v1 *hdr;
+ size_t len = skb->len;
int rc = -ENODEV;
+ hdr = skb_push(skb, sizeof(*hdr));
+ hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
+ hdr->type = cpu_to_le32(type);
+ hdr->src_node_id = cpu_to_le32(from->sq_node);
+ hdr->src_port_id = cpu_to_le32(from->sq_port);
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+
+ hdr->size = cpu_to_le32(len);
+ hdr->confirm_rx = 0;
+
+ skb_put_padto(skb, ALIGN(len, 4));
+
mutex_lock(&node->ep_lock);
if (node->ep)
rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
{
struct qrtr_node *node = ep->node;
- const struct qrtr_hdr *phdr = data;
+ const struct qrtr_hdr_v1 *v1;
+ const struct qrtr_hdr_v2 *v2;
struct sk_buff *skb;
- unsigned int psize;
+ struct qrtr_cb *cb;
unsigned int size;
- unsigned int type;
unsigned int ver;
- unsigned int dst;
-
- if (len < QRTR_HDR_SIZE || len & 3)
- return -EINVAL;
-
- ver = le32_to_cpu(phdr->version);
- size = le32_to_cpu(phdr->size);
- type = le32_to_cpu(phdr->type);
- dst = le32_to_cpu(phdr->dst_port_id);
-
- psize = (size + 3) & ~3;
+ size_t hdrlen;
- if (ver != QRTR_PROTO_VER)
- return -EINVAL;
-
- if (len != psize + QRTR_HDR_SIZE)
- return -EINVAL;
-
- if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
+ if (len & 3)
return -EINVAL;
skb = netdev_alloc_skb(NULL, len);
if (!skb)
return -ENOMEM;
- skb_reset_transport_header(skb);
- skb_put_data(skb, data, len);
-
- skb_queue_tail(&node->rx_queue, skb);
- schedule_work(&node->work);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+ cb = (struct qrtr_cb *)skb->cb;
-static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
- u32 src_node, u32 dst_node)
-{
- struct qrtr_hdr *hdr;
- struct sk_buff *skb;
-
- skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
- if (!skb)
- return NULL;
- skb_reset_transport_header(skb);
+ /* Version field in v1 is little endian, so this works for both cases */
+ ver = *(u8*)data;
- hdr = skb_put(skb, QRTR_HDR_SIZE);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->type = cpu_to_le32(type);
- hdr->src_node_id = cpu_to_le32(src_node);
- hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(pkt_len);
- hdr->dst_node_id = cpu_to_le32(dst_node);
- hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+ switch (ver) {
+ case QRTR_PROTO_VER_1:
+ v1 = data;
+ hdrlen = sizeof(*v1);
- return skb;
-}
+ cb->type = le32_to_cpu(v1->type);
+ cb->src_node = le32_to_cpu(v1->src_node_id);
+ cb->src_port = le32_to_cpu(v1->src_port_id);
+ cb->confirm_rx = !!v1->confirm_rx;
+ cb->dst_node = le32_to_cpu(v1->dst_node_id);
+ cb->dst_port = le32_to_cpu(v1->dst_port_id);
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
- u32 dst_node, u32 port)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ size = le32_to_cpu(v1->size);
+ break;
+ case QRTR_PROTO_VER_2:
+ v2 = data;
+ hdrlen = sizeof(*v2) + v2->optlen;
+
+ cb->type = v2->type;
+ cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+ cb->src_node = le16_to_cpu(v2->src_node_id);
+ cb->src_port = le16_to_cpu(v2->src_port_id);
+ cb->dst_node = le16_to_cpu(v2->dst_node_id);
+ cb->dst_port = le16_to_cpu(v2->dst_port_id);
+
+ if (cb->src_port == (u16)QRTR_PORT_CTRL)
+ cb->src_port = QRTR_PORT_CTRL;
+ if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+ cb->dst_port = QRTR_PORT_CTRL;
+
+ size = le32_to_cpu(v2->size);
+ break;
+ default:
+ pr_err("qrtr: Invalid version %d\n", ver);
+ goto err;
+ }
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
- src_node, dst_node);
- if (!skb)
- return NULL;
+ if (len != ALIGN(size, 4) + hdrlen)
+ goto err;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
- buf[1] = cpu_to_le32(src_node);
- buf[2] = cpu_to_le32(port);
+ if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
+ goto err;
- return skb;
-}
+ skb_put_data(skb, data + hdrlen, size);
-/* Allocate and construct a BYE message to signal remote termination */
-static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ skb_queue_tail(&node->rx_queue, skb);
+ schedule_work(&node->work);
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
- src_node, qrtr_local_nid);
- if (!skb)
- return NULL;
+ return 0;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
+err:
+ kfree_skb(skb);
+ return -EINVAL;
- return skb;
}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
-static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
+/**
+ * qrtr_alloc_ctrl_packet() - allocate control packet skb
+ * @pkt: reference to qrtr_ctrl_pkt pointer
+ *
+ * Returns newly allocated sk_buff, or NULL on failure
+ *
+ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+ * on success returns a reference to the control packet in @pkt.
+ */
+static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
{
- const int pkt_len = 20;
+ const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
struct sk_buff *skb;
- __le32 *buf;
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
- sq->sq_node, QRTR_NODE_BCAST);
+ skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
return NULL;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
- buf[1] = cpu_to_le32(sq->sq_node);
- buf[2] = cpu_to_le32(sq->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+ *pkt = skb_put_zero(skb, pkt_len);
return skb;
}
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
static void qrtr_node_rx_work(struct work_struct *work)
{
struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+ struct qrtr_ctrl_pkt *pkt;
+ struct sockaddr_qrtr dst;
+ struct sockaddr_qrtr src;
struct sk_buff *skb;
while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
- const struct qrtr_hdr *phdr;
- u32 dst_node, dst_port;
struct qrtr_sock *ipc;
- u32 src_node;
+ struct qrtr_cb *cb;
int confirm;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- src_node = le32_to_cpu(phdr->src_node_id);
- dst_node = le32_to_cpu(phdr->dst_node_id);
- dst_port = le32_to_cpu(phdr->dst_port_id);
- confirm = !!phdr->confirm_rx;
+ cb = (struct qrtr_cb *)skb->cb;
+ src.sq_node = cb->src_node;
+ src.sq_port = cb->src_port;
+ dst.sq_node = cb->dst_node;
+ dst.sq_port = cb->dst_port;
+ confirm = !!cb->confirm_rx;
- qrtr_node_assign(node, src_node);
+ qrtr_node_assign(node, cb->src_node);
- ipc = qrtr_port_lookup(dst_port);
+ ipc = qrtr_port_lookup(cb->dst_port);
if (!ipc) {
kfree_skb(skb);
} else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
}
if (confirm) {
- skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (!skb)
break;
- if (qrtr_node_enqueue(node, skb))
+
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ pkt->client.node = cpu_to_le32(dst.sq_node);
+ pkt->client.port = cpu_to_le32(dst.sq_port);
+
+ if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
+ &dst, &src))
break;
}
}
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
{
struct qrtr_node *node = ep->node;
+ struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+ struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
mutex_unlock(&node->ep_lock);
/* Notify the local controller about the event */
- skb = qrtr_alloc_local_bye(node->nid);
- if (skb)
- qrtr_local_enqueue(NULL, skb);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
+ if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
+ qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+ }
qrtr_node_release(node);
ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
/* Remove port assignment. */
static void qrtr_port_remove(struct qrtr_sock *ipc)
{
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
int port = ipc->us.sq_port;
+ struct sockaddr_qrtr to;
- skb = qrtr_alloc_del_client(&ipc->us);
+ to.sq_family = AF_QIPCRTR;
+ to.sq_node = QRTR_NODE_BCAST;
+ to.sq_port = QRTR_PORT_CTRL;
+
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+ pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+ pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+
skb_set_owner_w(skb, &ipc->sk);
- qrtr_bcast_enqueue(NULL, skb);
+ qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+ &to);
}
if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
sock_hold(&ipc->sk);
ipc->sk.sk_err = ENETRESET;
- wake_up_interruptible(sk_sleep(&ipc->sk));
+ ipc->sk.sk_error_report(&ipc->sk);
sock_put(&ipc->sk);
}
mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
}
/* Queue packet to local peer socket. */
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
- const struct qrtr_hdr *phdr;
struct qrtr_sock *ipc;
+ struct qrtr_cb *cb;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-
- ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+ ipc = qrtr_port_lookup(to->sq_port);
if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
kfree_skb(skb);
return -ENODEV;
}
+ cb = (struct qrtr_cb *)skb->cb;
+ cb->src_node = from->sq_node;
+ cb->src_port = from->sq_port;
+
if (sock_queue_rcv_skb(&ipc->sk, skb)) {
qrtr_port_put(ipc);
kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
}
/* Queue packet for broadcast. */
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
struct sk_buff *skbn;
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
if (!skbn)
break;
skb_set_owner_w(skbn, skb->sk);
- qrtr_node_enqueue(node, skbn);
+ qrtr_node_enqueue(node, skbn, type, from, to);
}
mutex_unlock(&qrtr_node_lock);
- qrtr_local_enqueue(node, skb);
+ qrtr_local_enqueue(node, skb, type, from, to);
return 0;
}
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+ struct sockaddr_qrtr *, struct sockaddr_qrtr *);
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sock *sk = sock->sk;
struct qrtr_node *node;
- struct qrtr_hdr *hdr;
struct sk_buff *skb;
size_t plen;
+ u32 type = QRTR_TYPE_DATA;
int rc;
if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
plen = (len + 3) & ~3;
- skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
goto out_node;
- skb_reset_transport_header(skb);
- skb_put(skb, len + QRTR_HDR_SIZE);
-
- hdr = (struct qrtr_hdr *)skb_transport_header(skb);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
- hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(len);
- hdr->dst_node_id = cpu_to_le32(addr->sq_node);
- hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
- rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
- &msg->msg_iter, len);
+ rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc) {
kfree_skb(skb);
goto out_node;
}
- if (plen != len) {
- rc = skb_pad(skb, plen - len);
- if (rc)
- goto out_node;
- skb_put(skb, plen - len);
- }
-
if (ipc->us.sq_port == QRTR_PORT_CTRL) {
if (len < 4) {
rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
/* control messages already require the type as 'command' */
- skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
- } else {
- hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+ skb_copy_bits(skb, 0, &type, 4);
+ type = le32_to_cpu(type);
}
- rc = enqueue_fn(node, skb);
+ rc = enqueue_fn(node, skb, type, &ipc->us, addr);
if (rc >= 0)
rc = len;
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- const struct qrtr_hdr *phdr;
struct sock *sk = sock->sk;
struct sk_buff *skb;
+ struct qrtr_cb *cb;
int copied, rc;
lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return rc;
}
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- copied = le32_to_cpu(phdr->size);
+ copied = skb->len;
if (copied > size) {
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
- rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
+ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
if (rc < 0)
goto out;
rc = copied;
if (addr) {
+ cb = (struct qrtr_cb *)skb->cb;
addr->sq_family = AF_QIPCRTR;
- addr->sq_node = le32_to_cpu(phdr->src_node_id);
- addr->sq_port = le32_to_cpu(phdr->src_port_id);
+ addr->sq_node = cb->src_node;
+ addr->sq_port = cb->src_port;
msg->msg_namelen = sizeof(*addr);
}
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case TIOCINQ:
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- len = skb->len - QRTR_HDR_SIZE;
+ len = skb->len;
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h
index 2b84871..b81e695 100644
--- a/net/qrtr/qrtr.h
+++ b/net/qrtr/qrtr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __QRTR_H_
#define __QRTR_H_
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 56c7d27..b5d568b 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_RDS) += rds.o
rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
recv.o send.o stats.o sysctl.o threads.o transport.o \
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ac..36dd209 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
+ bool has_fr, has_fmr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
+ has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ rds_ibdev->use_fastreg = (has_fr && !has_fmr);
rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index bf48224..a6f4d7d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_IB_H
#define _RDS_IB_H
@@ -215,8 +216,6 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
- bool has_fmr;
- bool has_fr;
bool use_fastreg;
unsigned int max_mrs;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e..e678699 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
if (pool_type == RDS_IB_MR_1M_POOL) {
/* +1 allows for unaligned MRs */
pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
- pool->max_items = RDS_MR_1M_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_1m_mrs;
} else {
/* pool_type == RDS_IB_MR_8K_POOL */
pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
- pool->max_items = RDS_MR_8K_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_8k_mrs;
}
pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 6ab39db..8557a1c 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
}
}
- rds_ib_set_wr_signal_state(ic, send, 0);
+ rds_ib_set_wr_signal_state(ic, send, false);
/*
* Always signal the last one if we're stopping due to flow control.
*/
- if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
- send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+ if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) {
+ rds_ib_set_wr_signal_state(ic, send, true);
+ send->s_wr.send_flags |= IB_SEND_SOLICITED;
+ }
if (send->s_wr.send_flags & IB_SEND_SIGNALED)
nr_sig++;
@@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_op = ic->i_data_op;
prev->s_wr.send_flags |= IB_SEND_SOLICITED;
- if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) {
- ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
- prev->s_wr.send_flags |= IB_SEND_SIGNALED;
- nr_sig++;
- }
+ if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED))
+ nr_sig += rds_ib_set_wr_signal_state(ic, prev, true);
ic->i_data_op = NULL;
}
@@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
send->s_atomic_wr.swap_mask = 0;
}
+ send->s_wr.send_flags = 0;
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_atomic_wr.wr.num_sge = 1;
send->s_atomic_wr.wr.next = NULL;
diff --git a/net/rds/info.h b/net/rds/info.h
index b6c052c..a069b51 100644
--- a/net/rds/info.h
+++ b/net/rds/info.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_INFO_H
#define _RDS_INFO_H
diff --git a/net/rds/loop.h b/net/rds/loop.h
index f32b093..469fa4b 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_LOOP_H
#define _RDS_LOOP_H
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index ff2010e..d309c44 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDMA_TRANSPORT_H
#define _RDMA_TRANSPORT_H
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 2e0315b..c349c71 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_RDS_H
#define _RDS_RDS_H
diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h
index e1241af..9521f6e 100644
--- a/net/rds/rds_single_path.h
+++ b/net/rds/rds_single_path.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_RDS_SINGLE_H
#define _RDS_RDS_SINGLE_H
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f8800b7..1aafbf7 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _RDS_TCP_H
#define _RDS_TCP_H
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a97292..6a5c499 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void rose_destroy_timer(unsigned long data)
+static void rose_destroy_timer(struct timer_list *t)
{
- rose_destroy_socket((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ rose_destroy_socket(sk);
}
/*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- setup_timer(&sk->sk_timer, rose_destroy_timer,
- (unsigned long)sk);
+ timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
sk->sk_timer.expires = jiffies + 10 * HZ;
add_timer(&sk->sk_timer);
} else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &rose_proto_ops;
sk->sk_protocol = protocol;
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout);
rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
orose = rose_sk(osk);
rose->t1 = orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a63947..9bbbfe3 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
switch (frametype) {
case ROSE_RESET_REQUEST:
rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+ /* fall through */
case ROSE_RESET_CONFIRMATION:
rose_stop_timer(sk);
rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638c..cda4c66 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_ftimer_expiry(unsigned long);
-static void rose_t0timer_expiry(unsigned long);
+static void rose_ftimer_expiry(struct timer_list *);
+static void rose_t0timer_expiry(struct timer_list *);
static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
{
del_timer(&neigh->ftimer);
- neigh->ftimer.data = (unsigned long)neigh;
- neigh->ftimer.function = &rose_ftimer_expiry;
+ neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry;
neigh->ftimer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
{
del_timer(&neigh->t0timer);
- neigh->t0timer.data = (unsigned long)neigh;
- neigh->t0timer.function = &rose_t0timer_expiry;
+ neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry;
neigh->t0timer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
return timer_pending(&neigh->t0timer);
}
-static void rose_ftimer_expiry(unsigned long param)
+static void rose_ftimer_expiry(struct timer_list *t)
{
}
-static void rose_t0timer_expiry(unsigned long param)
+static void rose_t0timer_expiry(struct timer_list *t)
{
- struct rose_neigh *neigh = (struct rose_neigh *)param;
+ struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
rose_transmit_restart_request(neigh);
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3444562..7af4f99 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
+static void rose_loopback_timer(struct timer_list *unused);
void rose_loopback_init(void)
{
skb_queue_head_init(&loopback_queue);
- init_timer(&loopback_timer);
+ timer_setup(&loopback_timer, rose_loopback_timer, 0);
}
static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
return 1;
}
-static void rose_loopback_timer(unsigned long);
static void rose_set_loopback_timer(void)
{
del_timer(&loopback_timer);
- loopback_timer.data = 0;
- loopback_timer.function = &rose_loopback_timer;
loopback_timer.expires = jiffies + 10;
-
add_timer(&loopback_timer);
}
-static void rose_loopback_timer(unsigned long param)
+static void rose_loopback_timer(struct timer_list *unused)
{
struct sk_buff *skb;
struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb3..8ca3124 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
skb_queue_head_init(&rose_neigh->queue);
- init_timer(&rose_neigh->ftimer);
- init_timer(&rose_neigh->t0timer);
+ timer_setup(&rose_neigh->ftimer, NULL, 0);
+ timer_setup(&rose_neigh->t0timer, NULL, 0);
if (rose_route->ndigis != 0) {
rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
case 0:
rose_node->neighbour[0] =
rose_node->neighbour[1];
+ /* fall through */
case 1:
rose_node->neighbour[1] =
rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
skb_queue_head_init(&sn->queue);
- init_timer(&sn->ftimer);
- init_timer(&sn->t0timer);
+ timer_setup(&sn->ftimer, NULL, 0);
+ timer_setup(&sn->t0timer, NULL, 0);
spin_lock_bh(&rose_neigh_list_lock);
sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->neighbour[0] = t->neighbour[1];
+ /* fall through */
case 1:
t->neighbour[1] = t->neighbour[2];
case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d..ea613b2 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_heartbeat_expiry(unsigned long);
-static void rose_timer_expiry(unsigned long);
-static void rose_idletimer_expiry(unsigned long);
+static void rose_heartbeat_expiry(struct timer_list *t);
+static void rose_timer_expiry(struct timer_list *);
+static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
del_timer(&sk->sk_timer);
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &rose_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
del_timer(&rose->idletimer);
if (rose->idle > 0) {
- rose->idletimer.data = (unsigned long)sk;
- rose->idletimer.function = &rose_idletimer_expiry;
+ rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
del_timer(&rose_sk(sk)->idletimer);
}
-static void rose_heartbeat_expiry(unsigned long param)
+static void rose_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct rose_sock *rose = rose_sk(sk);
bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_timer_expiry(unsigned long param)
+static void rose_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct rose_sock *rose = rose_sk(sk);
+ struct rose_sock *rose = from_timer(rose, t, timer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_idletimer_expiry(unsigned long param)
+static void rose_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct rose_sock *rose = from_timer(rose, t, idletimer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
rose_clear_queues(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 9c68d2f..6ffb7e9 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel RxRPC
#
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fb17552..344b2dc 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
ret = 0;
break;
}
+ /* Fall through */
default:
ret = -EBUSY;
break;
@@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
unsigned long user_call_ID,
s64 tx_total_len,
gfp_t gfp,
- rxrpc_notify_rx_t notify_rx)
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -304,14 +307,16 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.key = key;
cp.security_level = 0;
cp.exclusive = false;
+ cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
gfp);
/* The socket has been unlocked. */
- if (!IS_ERR(call))
+ if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
+ mutex_unlock(&call->user_mutex);
+ }
- mutex_unlock(&call->user_mutex);
_leave(" = %p", call);
return call;
}
@@ -337,6 +342,25 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
EXPORT_SYMBOL(rxrpc_kernel_end_call);
/**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - ie. we're
+ * getting ACKs from the server. Returns a number representing the life state
+ * which can be compared to that returned by a previous call.
+ *
+ * If this is a client call, ping ACKs will be sent to the server to find out
+ * whether it's still responsive and whether the call is still alive on the
+ * server.
+ */
+u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->acks_latest;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
* rxrpc_kernel_check_call - Check a call's state
* @sock: The socket the call is on
* @call: The call to check
@@ -537,6 +561,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
m->msg_name = &rx->connect_srx;
m->msg_namelen = sizeof(rx->connect_srx);
}
+ /* Fall through */
case RXRPC_SERVER_BOUND:
case RXRPC_SERVER_LISTENING:
ret = rxrpc_do_sendmsg(rx, m, len);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index e56e23e..1e37eb1 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1125,6 +1125,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
case RXRPC_PACKET_TYPE_BUSY:
if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
goto discard;
+ /* Fall through */
case RXRPC_PACKET_TYPE_DATA:
if (sp->hdr.callNumber == 0)
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97..d02a99f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
*_srx = call->peer->srx;
}
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ *
+ * Get the call's peer RTT.
+ */
+u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->peer->rtt;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21..e4937b3 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -607,6 +607,7 @@ wait_error:
* @_offset: The running offset into the buffer.
* @want_more: True if more data is expected to be read
* @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
*
* Allow a kernel service to receive data and pick up information about the
* state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +625,7 @@ wait_error:
*/
int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
void *buf, size_t size, size_t *_offset,
- bool want_more, u32 *_abort)
+ bool want_more, u32 *_abort, u16 *_service)
{
struct iov_iter iter;
struct kvec iov;
@@ -680,6 +681,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
+ if (_service)
+ *_service = call->service_id;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9ea6f97..7d25955 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
};
/*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (call->tx_top - call->tx_hard_ack <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ mutex_unlock(&call->user_mutex);
+ *timeo = schedule_timeout(*timeo);
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ return sock_intr_errno(*timeo);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call)
+{
+ rxrpc_seq_t tx_start, tx_win;
+ signed long rtt2, timeout;
+ u64 rtt;
+
+ rtt = READ_ONCE(call->peer->rtt);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 1)
+ rtt2 = 1;
+
+ timeout = rtt2;
+ tx_start = READ_ONCE(call->tx_hard_ack);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ tx_win = READ_ONCE(call->tx_hard_ack);
+ if (call->tx_top - tx_win <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (timeout == 0 &&
+ tx_win == tx_start && signal_pending(current))
+ return -EINTR;
+
+ if (tx_win != tx_start) {
+ timeout = rtt2;
+ tx_start = tx_win;
+ }
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ timeout = schedule_timeout(timeout);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- long *timeo)
+ long *timeo,
+ bool waitall)
{
DECLARE_WAITQUEUE(myself, current);
int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- ret = 0;
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
- break;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = call->error;
- break;
- }
- if (signal_pending(current)) {
- ret = sock_intr_errno(*timeo);
- break;
- }
-
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
- *timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0) {
- ret = sock_intr_errno(*timeo);
- break;
- }
- }
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -166,6 +220,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
ktime_get_real());
if (!last)
break;
+ /* Fall through */
case RXRPC_CALL_SERVER_SEND_REPLY:
call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
rxrpc_notify_end_tx(rx, call, notify_end_tx);
@@ -254,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo);
+ &timeo,
+ msg->msg_flags & MSG_WAITALL);
if (ret < 0)
goto maybe_error;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e70ed26..c03d86a 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -172,6 +172,17 @@ config NET_SCH_TBF
To compile this code as a module, choose M here: the
module will be called sch_tbf.
+config NET_SCH_CBS
+ tristate "Credit Based Shaper (CBS)"
+ ---help---
+ Say Y here if you want to use the Credit Based Shaper (CBS) packet
+ scheduling algorithm.
+
+ See the top of <file:net/sched/sch_cbs.c> for more details.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_cbs.
+
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7b915d2..5b63544 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux Traffic Control Unit.
#
@@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o
obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o
+obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index da6fa82..ff4d6908 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
@@ -78,6 +80,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
spin_lock_bh(&idrinfo->lock);
idr_remove_ext(&idrinfo->action_idr, p->tcfa_index);
spin_unlock_bh(&idrinfo->lock);
+ put_net(idrinfo->net);
gen_kill_estimator(&p->tcfa_rate_est);
free_tcf(p);
}
@@ -86,6 +89,8 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
{
int ret = 0;
+ ASSERT_RTNL();
+
if (p) {
if (bind)
p->tcfa_bindcnt--;
@@ -334,6 +339,7 @@ err3:
p->idrinfo = idrinfo;
p->ops = ops;
INIT_LIST_HEAD(&p->list);
+ get_net(idrinfo->net);
*a = p;
return 0;
}
@@ -1249,8 +1255,227 @@ out_module_put:
return skb->len;
}
+struct tcf_action_net {
+ struct rhashtable egdev_ht;
+};
+
+static unsigned int tcf_action_net_id;
+
+struct tcf_action_egdev_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_priv;
+};
+
+struct tcf_action_egdev {
+ struct rhash_head ht_node;
+ const struct net_device *dev;
+ unsigned int refcnt;
+ struct list_head cb_list;
+};
+
+static const struct rhashtable_params tcf_action_egdev_ht_params = {
+ .key_offset = offsetof(struct tcf_action_egdev, dev),
+ .head_offset = offsetof(struct tcf_action_egdev, ht_node),
+ .key_len = sizeof(const struct net_device *),
+};
+
+static struct tcf_action_egdev *
+tcf_action_egdev_lookup(const struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
+ tcf_action_egdev_ht_params);
+}
+
+static struct tcf_action_egdev *
+tcf_action_egdev_get(const struct net_device *dev)
+{
+ struct tcf_action_egdev *egdev;
+ struct tcf_action_net *tan;
+
+ egdev = tcf_action_egdev_lookup(dev);
+ if (egdev)
+ goto inc_ref;
+
+ egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
+ if (!egdev)
+ return NULL;
+ INIT_LIST_HEAD(&egdev->cb_list);
+ egdev->dev = dev;
+ tan = net_generic(dev_net(dev), tcf_action_net_id);
+ rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+
+inc_ref:
+ egdev->refcnt++;
+ return egdev;
+}
+
+static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
+{
+ struct tcf_action_net *tan;
+
+ if (--egdev->refcnt)
+ return;
+ tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
+ rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+ kfree(egdev);
+}
+
+static struct tcf_action_egdev_cb *
+tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list)
+ if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
+ return egdev_cb;
+ return NULL;
+}
+
+static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
+ err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
+}
+
+static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(egdev_cb))
+ return -EEXIST;
+ egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
+ if (!egdev_cb)
+ return -ENOMEM;
+ egdev_cb->cb = cb;
+ egdev_cb->cb_priv = cb_priv;
+ list_add(&egdev_cb->list, &egdev->cb_list);
+ return 0;
+}
+
+static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(!egdev_cb))
+ return;
+ list_del(&egdev_cb->list);
+ kfree(egdev_cb);
+}
+
+static int __tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
+ int err;
+
+ if (!egdev)
+ return -ENOMEM;
+ err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
+ if (err)
+ goto err_cb_add;
+ return 0;
+
+err_cb_add:
+ tcf_action_egdev_put(egdev);
+ return err;
+}
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
+
+static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (WARN_ON(!egdev))
+ return;
+ tcf_action_egdev_cb_del(egdev, cb, cb_priv);
+ tcf_action_egdev_put(egdev);
+}
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ rtnl_lock();
+ __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
+
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (!egdev)
+ return 0;
+ return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
+
+static __net_init int tcf_action_net_init(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
+}
+
+static void __net_exit tcf_action_net_exit(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ rhashtable_destroy(&tan->egdev_ht);
+}
+
+static struct pernet_operations tcf_action_net_ops = {
+ .init = tcf_action_net_init,
+ .exit = tcf_action_net_exit,
+ .id = &tcf_action_net_id,
+ .size = sizeof(struct tcf_action_net),
+};
+
static int __init tc_action_init(void)
{
+ int err;
+
+ err = register_pernet_subsys(&tcf_action_net_ops);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5ef8ce8..035ed268 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
- return tc_action_net_init(tn, &act_bpf_ops);
+ return tc_action_net_init(tn, &act_bpf_ops, net);
}
static void __net_exit bpf_exit_net(struct net *net)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 10b7a88..34e52d0 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
- return tc_action_net_init(tn, &act_connmark_ops);
+ return tc_action_net_init(tn, &act_connmark_ops, net);
}
static void __net_exit connmark_exit_net(struct net *net)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 1c40caa..35171df 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
- return tc_action_net_init(tn, &act_csum_ops);
+ return tc_action_net_init(tn, &act_csum_ops, net);
}
static void __net_exit csum_exit_net(struct net *net)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e29a48e..ef7f7f3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
- return tc_action_net_init(tn, &act_gact_ops);
+ return tc_action_net_init(tn, &act_gact_ops, net);
}
static void __net_exit gact_exit_net(struct net *net)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8ccd358..eca2725 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -248,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
return ret;
}
+#ifdef CONFIG_MODULES
+static const char *ife_meta_id2name(u32 metaid)
+{
+ switch (metaid) {
+ case IFE_META_SKBMARK:
+ return "skbmark";
+ case IFE_META_PRIO:
+ return "skbprio";
+ case IFE_META_TCINDEX:
+ return "tcindex";
+ default:
+ return "unknown";
+ }
+}
+#endif
+
/* called when adding new meta information
* under ife->tcf_lock for existing action
*/
@@ -263,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
if (exists)
spin_unlock_bh(&ife->tcf_lock);
rtnl_unlock();
- request_module("ifemeta%u", metaid);
+ request_module("ife-meta-%s", ife_meta_id2name(metaid));
rtnl_lock();
if (exists)
spin_lock_bh(&ife->tcf_lock);
@@ -392,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
static void tcf_ife_cleanup(struct tc_action *a, int bind)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
spin_lock_bh(&ife->tcf_lock);
_tcf_ife_cleanup(a, bind);
spin_unlock_bh(&ife->tcf_lock);
+
+ p = rcu_dereference_protected(ife->params, 1);
+ kfree_rcu(p, rcu);
}
/* under ife->tcf_lock for existing action */
@@ -432,6 +452,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_ife_params *p, *p_old;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
struct tc_ife *parm;
@@ -450,24 +471,41 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_IFE_PARMS]);
+ /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
+ * they cannot run as the same time. Check on all other values which
+ * are not supported right now.
+ */
+ if (parm->flags & ~IFE_ENCODE)
+ return -EINVAL;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
exists = tcf_idr_check(tn, parm->index, a, bind);
- if (exists && bind)
+ if (exists && bind) {
+ kfree(p);
return 0;
+ }
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
- bind, false);
- if (ret)
+ bind, true);
+ if (ret) {
+ kfree(p);
return ret;
+ }
ret = ACT_P_CREATED;
} else {
tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ kfree(p);
return -EEXIST;
+ }
}
ife = to_ife(*a);
- ife->flags = parm->flags;
+ p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
if (tb[TCA_IFE_TYPE])
@@ -478,24 +516,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
ife->tcf_action = parm->action;
if (parm->flags & IFE_ENCODE) {
if (daddr)
- ether_addr_copy(ife->eth_dst, daddr);
+ ether_addr_copy(p->eth_dst, daddr);
else
- eth_zero_addr(ife->eth_dst);
+ eth_zero_addr(p->eth_dst);
if (saddr)
- ether_addr_copy(ife->eth_src, saddr);
+ ether_addr_copy(p->eth_src, saddr);
else
- eth_zero_addr(ife->eth_src);
+ eth_zero_addr(p->eth_src);
- ife->eth_type = ife_type;
+ p->eth_type = ife_type;
}
+ if (exists)
+ spin_lock_bh(&ife->tcf_lock);
+
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(&ife->metalist);
@@ -511,6 +550,7 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
@@ -531,6 +571,7 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
}
@@ -538,6 +579,11 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ p_old = rtnl_dereference(ife->params);
+ rcu_assign_pointer(ife->params, p);
+ if (p_old)
+ kfree_rcu(p_old, rcu);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -549,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p = rtnl_dereference(ife->params);
struct tc_ife opt = {
.index = ife->tcf_index,
.refcnt = ife->tcf_refcnt - ref,
.bindcnt = ife->tcf_bindcnt - bind,
.action = ife->tcf_action,
- .flags = ife->flags,
+ .flags = p->flags,
};
struct tcf_t t;
@@ -565,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
- if (!is_zero_ether_addr(ife->eth_dst)) {
- if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
+ if (!is_zero_ether_addr(p->eth_dst)) {
+ if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
goto nla_put_failure;
}
- if (!is_zero_ether_addr(ife->eth_src)) {
- if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
+ if (!is_zero_ether_addr(p->eth_src)) {
+ if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
goto nla_put_failure;
}
- if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
+ if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;
if (dump_metalist(skb, ife)) {
@@ -617,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u8 *tlv_data;
u16 metalen;
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
- spin_unlock(&ife->tcf_lock);
if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
tlv_data = ife_decode(skb, &metalen);
if (unlikely(!tlv_data)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -647,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
*/
pr_info_ratelimited("Unknown metaid %d dlen %d\n",
mtype, dlen);
- ife->tcf_qstats.overlimits++;
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
}
}
if (WARN_ON(tlv_data != ifehdr_end)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -683,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
}
static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+ struct tcf_result *res, struct tcf_ife_params *p)
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
@@ -706,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
exceed_mtu = true;
}
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
* with no metadata
*/
- ife->tcf_qstats.overlimits++;
- spin_unlock(&ife->tcf_lock);
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
return action;
}
/* could be stupid policy setup or mtu config
* so lets be conservative.. */
if ((action == TC_ACT_SHOT) || exceed_mtu) {
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -731,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
ife_meta = ife_encode(skb, metalen);
+ spin_lock(&ife->tcf_lock);
+
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
@@ -742,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
skboff += err;
}
+ spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;
- if (!is_zero_ether_addr(ife->eth_src))
- ether_addr_copy(oethh->h_source, ife->eth_src);
- if (!is_zero_ether_addr(ife->eth_dst))
- ether_addr_copy(oethh->h_dest, ife->eth_dst);
- oethh->h_proto = htons(ife->eth_type);
+ if (!is_zero_ether_addr(p->eth_src))
+ ether_addr_copy(oethh->h_source, p->eth_src);
+ if (!is_zero_ether_addr(p->eth_dst))
+ ether_addr_copy(oethh->h_dest, p->eth_dst);
+ oethh->h_proto = htons(p->eth_type);
if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
- spin_unlock(&ife->tcf_lock);
-
return action;
}
@@ -768,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
+ int ret;
+
+ rcu_read_lock();
+ p = rcu_dereference(ife->params);
+ if (p->flags & IFE_ENCODE) {
+ ret = tcf_ife_encode(skb, a, res, p);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
- if (ife->flags & IFE_ENCODE)
- return tcf_ife_encode(skb, a, res);
-
- if (!(ife->flags & IFE_ENCODE))
- return tcf_ife_decode(skb, a, res);
-
- pr_info_ratelimited("unknown failure(policy neither de/encode\n");
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
- tcf_lastuse_update(&ife->tcf_tm);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
-
- return TC_ACT_SHOT;
+ return tcf_ife_decode(skb, a, res);
}
static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
@@ -818,7 +855,7 @@ static __net_init int ife_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
- return tc_action_net_init(tn, &act_ife_ops);
+ return tc_action_net_init(tn, &act_ife_ops, net);
}
static void __net_exit ife_exit_net(struct net *net)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d9e399a..dbdf3b2 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, ipt_net_id);
- return tc_action_net_init(tn, &act_ipt_ops);
+ return tc_action_net_init(tn, &act_ipt_ops, net);
}
static void __net_exit ipt_exit_net(struct net *net)
@@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, xt_net_id);
- return tc_action_net_init(tn, &act_xt_ops);
+ return tc_action_net_init(tn, &act_xt_ops, net);
}
static void __net_exit xt_exit_net(struct net *net)
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 8289217..1e3f10e 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
+MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d8..4033f9f 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_PRIO);
+MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774..2ea1f26 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2016)");
MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
+MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 416627c..8df5775 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
+ m->net = net;
if (ret != ACT_P_CREATED)
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
@@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-static int tcf_mirred_device(const struct tc_action *a, struct net *net,
- struct net_device **mirred_dev)
+static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
{
- int ifindex = tcf_mirred_ifindex(a);
+ struct tcf_mirred *m = to_mirred(a);
- *mirred_dev = __dev_get_by_index(net, ifindex);
- if (!*mirred_dev)
- return -EINVAL;
- return 0;
+ return __dev_get_by_index(m->net, m->tcfm_ifindex);
}
static struct tc_action_ops act_mirred_ops = {
@@ -336,14 +333,14 @@ static struct tc_action_ops act_mirred_ops = {
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
- .get_dev = tcf_mirred_device,
+ .get_dev = tcf_mirred_get_dev,
};
static __net_init int mirred_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
- return tc_action_net_init(tn, &act_mirred_ops);
+ return tc_action_net_init(tn, &act_mirred_ops, net);
}
static void __net_exit mirred_exit_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c365d01..7eeaaf9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
- return tc_action_net_init(tn, &act_nat_ops);
+ return tc_action_net_init(tn, &act_nat_ops, net);
}
static void __net_exit nat_exit_net(struct net *net)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 491fe5d..b3d82c3 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
- return tc_action_net_init(tn, &act_pedit_ops);
+ return tc_action_net_init(tn, &act_pedit_ops, net);
}
static void __net_exit pedit_exit_net(struct net *net)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 3bb2ebf..9ec42b2 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, police_net_id);
- return tc_action_net_init(tn, &act_police_ops);
+ return tc_action_net_init(tn, &act_police_ops, net);
}
static void __net_exit police_exit_net(struct net *net)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index ec986ae..e69a1e3 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
- return tc_action_net_init(tn, &act_sample_ops);
+ return tc_action_net_init(tn, &act_sample_ops, net);
}
static void __net_exit sample_exit_net(struct net *net)
@@ -264,12 +264,13 @@ static int __init sample_init_module(void)
static void __exit sample_cleanup_module(void)
{
+ rcu_barrier();
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
}
module_init(sample_init_module);
module_exit(sample_cleanup_module);
-MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>");
+MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>");
MODULE_DESCRIPTION("Packet sampling action");
MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7b57e5..a8d0ea9 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
- return tc_action_net_init(tn, &act_simp_ops);
+ return tc_action_net_init(tn, &act_simp_ops, net);
}
static void __net_exit simp_exit_net(struct net *net)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 59949d6..fbac624 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
- return tc_action_net_init(tn, &act_skbedit_ops);
+ return tc_action_net_init(tn, &act_skbedit_ops, net);
}
static void __net_exit skbedit_exit_net(struct net *net)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index b642ad3..8e12d88 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
- return tc_action_net_init(tn, &act_skbmod_ops);
+ return tc_action_net_init(tn, &act_skbmod_ops, net);
}
static void __net_exit skbmod_exit_net(struct net *net)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 30c9627..c33faa3 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
- return tc_action_net_init(tn, &act_tunnel_key_ops);
+ return tc_action_net_init(tn, &act_tunnel_key_ops, net);
}
static void __net_exit tunnel_key_exit_net(struct net *net)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 16eb067..115fc33 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
- return tc_action_net_init(tn, &act_vlan_ops);
+ return tc_action_net_init(tn, &act_vlan_ops, net);
}
static void __net_exit vlan_exit_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219a..206e19f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -77,6 +77,8 @@ out:
}
EXPORT_SYMBOL(register_tcf_proto_ops);
+static struct workqueue_struct *tc_filter_wq;
+
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
{
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
* tcf_proto_ops's destroy() handler.
*/
rcu_barrier();
+ flush_workqueue(tc_filter_wq);
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
}
EXPORT_SYMBOL(unregister_tcf_proto_ops);
+bool tcf_queue_work(struct work_struct *work)
+{
+ return queue_work(tc_filter_wq, work);
+}
+EXPORT_SYMBOL(tcf_queue_work);
+
/* Select new prio value from the range, managed by kernel. */
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -186,12 +195,19 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
return chain;
}
+static void tcf_chain_head_change(struct tcf_chain *chain,
+ struct tcf_proto *tp_head)
+{
+ if (chain->chain_head_change)
+ chain->chain_head_change(tp_head,
+ chain->chain_head_change_priv);
+}
+
static void tcf_chain_flush(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- if (chain->p_filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
+ tcf_chain_head_change(chain, NULL);
while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
tcf_chain_put(chain);
@@ -233,15 +249,35 @@ void tcf_chain_put(struct tcf_chain *chain)
}
EXPORT_SYMBOL(tcf_chain_put);
-static void
-tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
- struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ enum tc_block_command command)
{
- chain->p_filter_chain = p_filter_chain;
+ struct net_device *dev = q->dev_queue->dev;
+ struct tc_block_offload bo = {};
+
+ if (!dev->netdev_ops->ndo_setup_tc)
+ return;
+ bo.command = command;
+ bo.binder_type = ei->binder_type;
+ bo.block = block;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
}
-int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+}
+
+static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+}
+
+int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
struct tcf_chain *chain;
@@ -250,13 +286,20 @@ int tcf_block_get(struct tcf_block **p_block,
if (!block)
return -ENOMEM;
INIT_LIST_HEAD(&block->chain_list);
+ INIT_LIST_HEAD(&block->cb_list);
+
/* Create chain 0 by default, it has to be always present. */
chain = tcf_chain_create(block, 0);
if (!chain) {
err = -ENOMEM;
goto err_chain_create;
}
- tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ WARN_ON(!ei->chain_head_change);
+ chain->chain_head_change = ei->chain_head_change;
+ chain->chain_head_change_priv = ei->chain_head_change_priv;
+ block->net = qdisc_net(q);
+ block->q = q;
+ tcf_block_offload_bind(block, q, ei);
*p_block = block;
return 0;
@@ -264,43 +307,178 @@ err_chain_create:
kfree(block);
return err;
}
+EXPORT_SYMBOL(tcf_block_get_ext);
+
+static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
+{
+ struct tcf_proto __rcu **p_filter_chain = priv;
+
+ rcu_assign_pointer(*p_filter_chain, tp_head);
+}
+
+int tcf_block_get(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+{
+ struct tcf_block_ext_info ei = {
+ .chain_head_change = tcf_chain_head_change_dflt,
+ .chain_head_change_priv = p_filter_chain,
+ };
+
+ WARN_ON(!p_filter_chain);
+ return tcf_block_get_ext(p_block, q, &ei);
+}
EXPORT_SYMBOL(tcf_block_get);
-void tcf_block_put(struct tcf_block *block)
+static void tcf_block_put_final(struct work_struct *work)
+{
+ struct tcf_block *block = container_of(work, struct tcf_block, work);
+ struct tcf_chain *chain, *tmp;
+
+ rtnl_lock();
+ /* Only chain 0 should be still here. */
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_put(chain);
+ rtnl_unlock();
+ kfree(block);
+}
+
+/* XXX: Standalone actions are not allowed to jump to any chain, and bound
+ * actions should be all removed after flushing. However, filters are now
+ * destroyed in tc filter workqueue with RTNL lock, they can not race here.
+ */
+void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
struct tcf_chain *chain, *tmp;
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ tcf_chain_flush(chain);
+
+ tcf_block_offload_unbind(block, q, ei);
+
+ INIT_WORK(&block->work, tcf_block_put_final);
+ /* Wait for existing RCU callbacks to cool down, make sure their works
+ * have been queued before this. We can not flush pending works here
+ * because we are holding the RTNL lock.
+ */
+ rcu_barrier();
+ tcf_queue_work(&block->work);
+}
+EXPORT_SYMBOL(tcf_block_put_ext);
+
+void tcf_block_put(struct tcf_block *block)
+{
+ struct tcf_block_ext_info ei = {0, };
+
if (!block)
return;
+ tcf_block_put_ext(block, block->q, &ei);
+}
- /* XXX: Standalone actions are not allowed to jump to any chain, and
- * bound actions should be all removed after flushing. However,
- * filters are destroyed in RCU callbacks, we have to hold the chains
- * first, otherwise we would always race with RCU callbacks on this list
- * without proper locking.
- */
+EXPORT_SYMBOL(tcf_block_put);
- /* Wait for existing RCU callbacks to cool down. */
- rcu_barrier();
+struct tcf_block_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_ident;
+ void *cb_priv;
+ unsigned int refcnt;
+};
- /* Hold a refcnt for all chains, except 0, in case they are gone. */
- list_for_each_entry(chain, &block->chain_list, list)
- if (chain->index)
- tcf_chain_hold(chain);
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+ return block_cb->cb_priv;
+}
+EXPORT_SYMBOL(tcf_block_cb_priv);
- /* No race on the list, because no chain could be destroyed. */
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_flush(chain);
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{ struct tcf_block_cb *block_cb;
- /* Wait for RCU callbacks to release the reference count. */
- rcu_barrier();
+ list_for_each_entry(block_cb, &block->cb_list, list)
+ if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
+ return block_cb;
+ return NULL;
+}
+EXPORT_SYMBOL(tcf_block_cb_lookup);
- /* At this point, all the chains should have refcnt == 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
- tcf_chain_put(chain);
- kfree(block);
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+ block_cb->refcnt++;
+}
+EXPORT_SYMBOL(tcf_block_cb_incref);
+
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+ return --block_cb->refcnt;
+}
+EXPORT_SYMBOL(tcf_block_cb_decref);
+
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
+ if (!block_cb)
+ return NULL;
+ block_cb->cb = cb;
+ block_cb->cb_ident = cb_ident;
+ block_cb->cb_priv = cb_priv;
+ list_add(&block_cb->list, &block->cb_list);
+ return block_cb;
+}
+EXPORT_SYMBOL(__tcf_block_cb_register);
+
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+ return block_cb ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(tcf_block_cb_register);
+
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+ list_del(&block_cb->list);
+ kfree(block_cb);
+}
+EXPORT_SYMBOL(__tcf_block_cb_unregister);
+
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
+ if (!block_cb)
+ return;
+ __tcf_block_cb_unregister(block_cb);
+}
+EXPORT_SYMBOL(tcf_block_cb_unregister);
+
+static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_block_cb *block_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
}
-EXPORT_SYMBOL(tcf_block_put);
/* Main classifier routine: scans classifier chain attached
* to this qdisc, (optionally) tests for protocol and asks
@@ -370,9 +548,8 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- if (chain->p_filter_chain &&
- *chain_info->pprev == chain->filter_chain)
- rcu_assign_pointer(*chain->p_filter_chain, tp);
+ if (*chain_info->pprev == chain->filter_chain)
+ tcf_chain_head_change(chain, tp);
RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
tcf_chain_hold(chain);
@@ -384,8 +561,8 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
{
struct tcf_proto *next = rtnl_dereference(chain_info->next);
- if (chain->p_filter_chain && tp == chain->filter_chain)
- RCU_INIT_POINTER(*chain->p_filter_chain, next);
+ if (tp == chain->filter_chain)
+ tcf_chain_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
tcf_chain_put(chain);
}
@@ -418,8 +595,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- struct tcf_proto *tp, void *fh, u32 portid,
- u32 seq, u16 flags, int event)
+ struct tcf_proto *tp, struct Qdisc *q, u32 parent,
+ void *fh, u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -432,8 +609,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
- tcm->tcm_parent = tp->classid;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = parent;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
@@ -456,6 +633,7 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
void *fh, int event, bool unicast)
{
struct sk_buff *skb;
@@ -465,7 +643,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
n->nlmsg_flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -480,6 +658,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
void *fh, bool unicast, bool *last)
{
struct sk_buff *skb;
@@ -490,7 +669,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -510,6 +689,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ struct Qdisc *q, u32 parent,
struct nlmsghdr *n,
struct tcf_chain *chain, int event)
{
@@ -517,7 +697,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
- tfilter_notify(net, oskb, n, tp, 0, event, false);
+ tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
}
/* Add/change/delete/get a filter node */
@@ -636,7 +816,8 @@ replay:
}
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
- tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+ tfilter_notify_chain(net, skb, q, parent, n,
+ chain, RTM_DELTFILTER);
tcf_chain_flush(chain);
err = 0;
goto errout;
@@ -683,7 +864,7 @@ replay:
if (!fh) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh,
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_DELTFILTER, false);
tcf_proto_destroy(tp);
err = 0;
@@ -708,8 +889,8 @@ replay:
}
break;
case RTM_DELTFILTER:
- err = tfilter_del_notify(net, skb, n, tp, fh, false,
- &last);
+ err = tfilter_del_notify(net, skb, n, tp, q, parent,
+ fh, false, &last);
if (err)
goto errout;
if (last) {
@@ -718,7 +899,7 @@ replay:
}
goto errout;
case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, fh,
+ err = tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_NEWTFILTER, true);
goto errout;
default:
@@ -732,7 +913,8 @@ replay:
if (err == 0) {
if (tp_created)
tcf_chain_tp_insert(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
+ RTM_NEWTFILTER, false);
} else {
if (tp_created)
tcf_proto_destroy(tp);
@@ -751,6 +933,8 @@ struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
struct netlink_callback *cb;
+ struct Qdisc *q;
+ u32 parent;
};
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -758,13 +942,14 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct tcf_dump_args *a = (void *)arg;
struct net *net = sock_net(a->skb->sk);
- return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
+ return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
+ n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER);
}
-static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
- struct netlink_callback *cb,
+static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
+ struct sk_buff *skb, struct netlink_callback *cb,
long index_start, long *p_index)
{
struct net *net = sock_net(skb->sk);
@@ -786,7 +971,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0,
+ if (tcf_fill_node(net, skb, tp, q, parent, 0,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
@@ -799,6 +984,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
arg.w.fn = tcf_node_dump;
arg.skb = skb;
arg.cb = cb;
+ arg.q = q;
+ arg.parent = parent;
arg.w.stop = 0;
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
@@ -824,6 +1011,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
const struct Qdisc_class_ops *cops;
long index_start;
long index;
+ u32 parent;
int err;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -837,10 +1025,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
return skb->len;
- if (!tcm->tcm_parent)
+ parent = tcm->tcm_parent;
+ if (!parent) {
q = dev->qdisc;
- else
+ parent = q->handle;
+ } else {
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -864,7 +1055,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
- if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
+ if (!tcf_chain_dump(chain, q, parent, skb, cb,
+ index_start, &index))
break;
}
@@ -879,6 +1071,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
#ifdef CONFIG_NET_CLS_ACT
LIST_HEAD(actions);
+ ASSERT_RTNL();
tcf_exts_to_list(exts, &actions);
tcf_action_destroy(&actions, TCA_ACT_UNBIND);
kfree(exts->actions);
@@ -1004,32 +1197,63 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
- struct net_device **hw_dev)
+static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
+ int ok_count = 0;
#ifdef CONFIG_NET_CLS_ACT
const struct tc_action *a;
- LIST_HEAD(actions);
+ struct net_device *dev;
+ int i, ret;
if (!tcf_exts_has_actions(exts))
- return -EINVAL;
+ return 0;
- tcf_exts_to_list(exts, &actions);
- list_for_each_entry(a, &actions, list) {
- if (a->ops->get_dev) {
- a->ops->get_dev(a, dev_net(dev), hw_dev);
- break;
- }
+ for (i = 0; i < exts->nr_actions; i++) {
+ a = exts->actions[i];
+ if (!a->ops->get_dev)
+ continue;
+ dev = a->ops->get_dev(a);
+ if (!dev)
+ continue;
+ ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
}
- if (*hw_dev)
- return 0;
#endif
- return -EOPNOTSUPP;
+ return ok_count;
}
-EXPORT_SYMBOL(tcf_exts_get_dev);
+
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+ enum tc_setup_type type, void *type_data, bool err_stop)
+{
+ int ok_count;
+ int ret;
+
+ ret = tcf_block_cb_call(block, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count = ret;
+
+ if (!exts)
+ return ok_count;
+ ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
+
+ return ok_count;
+}
+EXPORT_SYMBOL(tc_setup_cb_call);
static int __init tc_filter_init(void)
{
+ tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
+ if (!tc_filter_wq)
+ return -ENOMEM;
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 700b345..8713513 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -35,7 +35,10 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -84,15 +87,26 @@ static int basic_init(struct tcf_proto *tp)
return 0;
}
-static void basic_delete_filter(struct rcu_head *head)
+static void basic_delete_filter_work(struct work_struct *work)
{
- struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+ struct basic_filter *f = container_of(work, struct basic_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
+ rtnl_unlock();
+
kfree(f);
}
+static void basic_delete_filter(struct rcu_head *head)
+{
+ struct basic_filter *f = container_of(head, struct basic_filter, rcu);
+
+ INIT_WORK(&f->work, basic_delete_filter_work);
+ tcf_queue_work(&f->work);
+}
+
static void basic_destroy(struct tcf_proto *tp)
{
struct basic_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6c6b21f..dc9bd9a 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -50,7 +50,10 @@ struct cls_bpf_prog {
struct sock_filter *bpf_ops;
const char *bpf_name;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -147,7 +150,9 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
enum tc_clsbpf_command cmd)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(prog->gen_flags);
struct tc_cls_bpf_offload cls_bpf = {};
int err;
@@ -159,17 +164,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
cls_bpf.exts_integrated = prog->exts_integrated;
cls_bpf.gen_flags = prog->gen_flags;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf);
- if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
- prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+ if (addorrep) {
+ if (err < 0) {
+ cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ return err;
+ } else if (err > 0) {
+ prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ }
+ }
- return err;
+ if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
+
+ return 0;
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct cls_bpf_prog *obj = prog;
enum tc_clsbpf_command cmd;
bool skip_sw;
@@ -179,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
(oldprog && tc_skip_sw(oldprog->gen_flags));
if (oldprog && oldprog->offloaded) {
- if (tc_should_offload(dev, prog->gen_flags)) {
+ if (!tc_skip_hw(prog->gen_flags)) {
cmd = TC_CLSBPF_REPLACE;
} else if (!tc_skip_sw(prog->gen_flags)) {
obj = oldprog;
@@ -188,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
return -EINVAL;
}
} else {
- if (!tc_should_offload(dev, prog->gen_flags))
+ if (tc_skip_hw(prog->gen_flags))
return skip_sw ? -EINVAL : 0;
cmd = TC_CLSBPF_ADD;
}
ret = cls_bpf_offload_cmd(tp, obj, cmd);
if (ret)
- return skip_sw ? ret : 0;
+ return ret;
obj->offloaded = true;
if (oldprog)
@@ -259,9 +272,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
kfree(prog);
}
+static void cls_bpf_delete_prog_work(struct work_struct *work)
+{
+ struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
+
+ rtnl_lock();
+ __cls_bpf_delete_prog(prog);
+ rtnl_unlock();
+}
+
static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
{
- __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
+ struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
+
+ INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
+ tcf_queue_work(&prog->work);
}
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
@@ -349,7 +374,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
}
static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
- const struct tcf_proto *tp)
+ u32 gen_flags, const struct tcf_proto *tp)
{
struct bpf_prog *fp;
char *name = NULL;
@@ -357,7 +382,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
- fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
+ if (gen_flags & TCA_CLS_FLAGS_SKIP_SW)
+ fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS,
+ qdisc_dev(tp->q));
+ else
+ fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
if (IS_ERR(fp))
return PTR_ERR(fp);
@@ -415,7 +444,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
prog->gen_flags = gen_flags;
ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
- cls_bpf_prog_from_efd(tb, prog, tp);
+ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
if (ret < 0)
return ret;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index d48452f..a97e069 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
struct tcf_exts exts;
struct tcf_ematch_tree ematches;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
+static void cls_cgroup_destroy_work(struct work_struct *work)
+{
+ struct cls_cgroup_head *head = container_of(work,
+ struct cls_cgroup_head,
+ work);
+ rtnl_lock();
+ tcf_exts_destroy(&head->exts);
+ tcf_em_tree_destroy(&head->ematches);
+ kfree(head);
+ rtnl_unlock();
+}
+
static void cls_cgroup_destroy_rcu(struct rcu_head *root)
{
struct cls_cgroup_head *head = container_of(root,
struct cls_cgroup_head,
rcu);
- tcf_exts_destroy(&head->exts);
- tcf_em_tree_destroy(&head->ematches);
- kfree(head);
+ INIT_WORK(&head->work, cls_cgroup_destroy_work);
+ tcf_queue_work(&head->work);
}
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2a3a60e..9c08fcd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -57,7 +57,10 @@ struct flow_filter {
u32 divisor;
u32 baseclass;
u32 hashrnd;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static inline u32 addr_fold(void *addr)
@@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
-static void flow_perturbation(unsigned long arg)
+static void flow_perturbation(struct timer_list *t)
{
- struct flow_filter *f = (struct flow_filter *)arg;
+ struct flow_filter *f = from_timer(f, t, perturb_timer);
get_random_bytes(&f->hashrnd, 4);
if (f->perturb_period)
@@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
-static void flow_destroy_filter(struct rcu_head *head)
+static void flow_destroy_filter_work(struct work_struct *work)
{
- struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+ struct flow_filter *f = container_of(work, struct flow_filter, work);
+ rtnl_lock();
del_timer_sync(&f->perturb_timer);
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
kfree(f);
+ rtnl_unlock();
+}
+
+static void flow_destroy_filter(struct rcu_head *head)
+{
+ struct flow_filter *f = container_of(head, struct flow_filter, rcu);
+
+ INIT_WORK(&f->work, flow_destroy_filter_work);
+ tcf_queue_work(&f->work);
}
static int flow_change(struct net *net, struct sk_buff *in_skb,
@@ -491,8 +504,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
- if (TC_H_MAJ(baseclass) == 0)
- baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+ if (TC_H_MAJ(baseclass) == 0) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ baseclass = TC_H_MAKE(q->handle, baseclass);
+ }
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
@@ -502,8 +518,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
- (unsigned long)fnew);
+ timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
netif_keep_dst(qdisc_dev(tp->q));
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index db831ac..c99fa9e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -87,7 +87,10 @@ struct cls_fl_filter {
struct list_head list;
u32 handle;
u32 flags;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
struct net_device *hw_dev;
};
@@ -190,27 +193,35 @@ static int fl_init(struct tcf_proto *tp)
return 0;
}
-static void fl_destroy_filter(struct rcu_head *head)
+static void fl_destroy_filter_work(struct work_struct *work)
{
- struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+ struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
kfree(f);
+ rtnl_unlock();
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ INIT_WORK(&f->work, fl_destroy_filter_work);
+ tcf_queue_work(&f->work);
}
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
{
struct tc_cls_flower_offload cls_flower = {};
- struct net_device *dev = f->hw_dev;
-
- if (!tc_can_offload(dev))
- return;
+ struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -218,22 +229,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
struct fl_flow_key *mask,
struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(f->flags);
int err;
- if (!tc_can_offload(dev)) {
- if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
- (f->hw_dev && !tc_can_offload(f->hw_dev))) {
- f->hw_dev = dev;
- return tc_skip_sw(f->flags) ? -EINVAL : 0;
- }
- dev = f->hw_dev;
- cls_flower.egress_dev = true;
- } else {
- f->hw_dev = dev;
- }
-
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
@@ -241,32 +241,36 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.mask = mask;
cls_flower.key = &f->mkey;
cls_flower.exts = &f->exts;
+ cls_flower.classid = f->res.classid;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
- &cls_flower);
- if (!err)
+ err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, skip_sw);
+ if (err < 0) {
+ fl_hw_destroy_filter(tp, f);
+ return err;
+ } else if (err > 0) {
f->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
+
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- if (tc_skip_sw(f->flags))
- return err;
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
{
struct tc_cls_flower_offload cls_flower = {};
- struct net_device *dev = f->hw_dev;
-
- if (!tc_can_offload(dev))
- return;
+ struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
+ cls_flower.classid = f->res.classid;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
- &cls_flower);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 941245a..5908f56 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
#define HTSIZE 256
@@ -46,7 +47,10 @@ struct fw_filter {
#endif /* CONFIG_NET_CLS_IND */
struct tcf_exts exts;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static u32 fw_hash(u32 handle)
@@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
}
}
} else {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
/* Old method: classify the packet using its skb mark. */
if (id && (TC_H_MAJ(id) == 0 ||
- !(TC_H_MAJ(id ^ tp->q->handle)))) {
+ !(TC_H_MAJ(id ^ q->handle)))) {
res->classid = id;
res->class = 0;
return 0;
@@ -119,12 +125,22 @@ static int fw_init(struct tcf_proto *tp)
return 0;
}
-static void fw_delete_filter(struct rcu_head *head)
+static void fw_delete_filter_work(struct work_struct *work)
{
- struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+ struct fw_filter *f = container_of(work, struct fw_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
kfree(f);
+ rtnl_unlock();
+}
+
+static void fw_delete_filter(struct rcu_head *head)
+{
+ struct fw_filter *f = container_of(head, struct fw_filter, rcu);
+
+ INIT_WORK(&f->work, fw_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void fw_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index eeac606..95dc997 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -21,7 +21,10 @@ struct cls_mall_head {
struct tcf_result res;
u32 handle;
u32 flags;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -41,21 +44,46 @@ static int mall_init(struct tcf_proto *tp)
return 0;
}
+static void mall_destroy_work(struct work_struct *work)
+{
+ struct cls_mall_head *head = container_of(work, struct cls_mall_head,
+ work);
+ rtnl_lock();
+ tcf_exts_destroy(&head->exts);
+ kfree(head);
+ rtnl_unlock();
+}
+
static void mall_destroy_rcu(struct rcu_head *rcu)
{
struct cls_mall_head *head = container_of(rcu, struct cls_mall_head,
rcu);
- tcf_exts_destroy(&head->exts);
- kfree(head);
+ INIT_WORK(&head->work, mall_destroy_work);
+ tcf_queue_work(&head->work);
+}
+
+static void mall_destroy_hw_filter(struct tcf_proto *tp,
+ struct cls_mall_head *head,
+ unsigned long cookie)
+{
+ struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+
+ tc_cls_common_offload_init(&cls_mall.common, tp);
+ cls_mall.command = TC_CLSMATCHALL_DESTROY;
+ cls_mall.cookie = cookie;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
}
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(head->flags);
int err;
tc_cls_common_offload_init(&cls_mall.common, tp);
@@ -63,37 +91,29 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL,
- &cls_mall);
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
+ &cls_mall, skip_sw);
+ if (err < 0) {
+ mall_destroy_hw_filter(tp, head, cookie);
+ return err;
+ } else if (err > 0) {
head->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- return err;
-}
-
-static void mall_destroy_hw_filter(struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long cookie)
-{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_matchall_offload cls_mall = {};
-
- tc_cls_common_offload_init(&cls_mall.common, tp);
- cls_mall.command = TC_CLSMATCHALL_DESTROY;
- cls_mall.cookie = cookie;
+ if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall);
+ return 0;
}
static void mall_destroy(struct tcf_proto *tp)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
if (!head)
return;
- if (tc_should_offload(dev, head->flags))
+ if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
call_rcu(&head->rcu, mall_destroy_rcu);
@@ -133,7 +153,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
void **arg, bool ovr)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
u32 flags = 0;
@@ -173,14 +192,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto err_set_parms;
- if (tc_should_offload(dev, flags)) {
+ if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long) new);
- if (err) {
- if (tc_skip_sw(flags))
- goto err_replace_hw_filter;
- else
- err = 0;
- }
+ if (err)
+ goto err_replace_hw_filter;
}
if (!tc_in_hw(new->flags))
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 9ddde65..4b14ccd 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -57,7 +57,10 @@ struct route4_filter {
u32 handle;
struct route4_bucket *bkt;
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
@@ -254,12 +257,22 @@ static int route4_init(struct tcf_proto *tp)
return 0;
}
-static void route4_delete_filter(struct rcu_head *head)
+static void route4_delete_filter_work(struct work_struct *work)
{
- struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+ struct route4_filter *f = container_of(work, struct route4_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
kfree(f);
+ rtnl_unlock();
+}
+
+static void route4_delete_filter(struct rcu_head *head)
+{
+ struct route4_filter *f = container_of(head, struct route4_filter, rcu);
+
+ INIT_WORK(&f->work, route4_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void route4_destroy(struct tcf_proto *tp)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b1f6ed4..bdbc541 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -97,7 +97,10 @@ struct rsvp_filter {
u32 handle;
struct rsvp_session *sess;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
@@ -282,12 +285,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
-static void rsvp_delete_filter_rcu(struct rcu_head *head)
+static void rsvp_delete_filter_work(struct work_struct *work)
{
- struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+ struct rsvp_filter *f = container_of(work, struct rsvp_filter, work);
+ rtnl_lock();
tcf_exts_destroy(&f->exts);
kfree(f);
+ rtnl_unlock();
+}
+
+static void rsvp_delete_filter_rcu(struct rcu_head *head)
+{
+ struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+
+ INIT_WORK(&f->work, rsvp_delete_filter_work);
+ tcf_queue_work(&f->work);
}
static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 14a7e08..d6abfa6 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
@@ -27,14 +28,20 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
};
@@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
f = tcindex_lookup(p, key);
if (!f) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
if (!p->fall_through)
return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+ res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
res->class = 0;
pr_debug("alg 0x%x\n", res->classid);
return 0;
@@ -133,12 +142,34 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
+static void tcindex_destroy_rexts_work(struct work_struct *work)
+{
+ struct tcindex_filter_result *r;
+
+ r = container_of(work, struct tcindex_filter_result, work);
+ rtnl_lock();
+ tcf_exts_destroy(&r->exts);
+ rtnl_unlock();
+}
+
static void tcindex_destroy_rexts(struct rcu_head *head)
{
struct tcindex_filter_result *r;
r = container_of(head, struct tcindex_filter_result, rcu);
- tcf_exts_destroy(&r->exts);
+ INIT_WORK(&r->work, tcindex_destroy_rexts_work);
+ tcf_queue_work(&r->work);
+}
+
+static void tcindex_destroy_fexts_work(struct work_struct *work)
+{
+ struct tcindex_filter *f = container_of(work, struct tcindex_filter,
+ work);
+
+ rtnl_lock();
+ tcf_exts_destroy(&f->result.exts);
+ kfree(f);
+ rtnl_unlock();
}
static void tcindex_destroy_fexts(struct rcu_head *head)
@@ -146,8 +177,8 @@ static void tcindex_destroy_fexts(struct rcu_head *head)
struct tcindex_filter *f = container_of(head, struct tcindex_filter,
rcu);
- tcf_exts_destroy(&f->result.exts);
- kfree(f);
+ INIT_WORK(&f->work, tcindex_destroy_fexts_work);
+ tcf_queue_work(&f->work);
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 094d224..2737b71 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -69,7 +69,10 @@ struct tc_u_knode {
u32 __percpu *pcpu_success;
#endif
struct tcf_proto *tp;
- struct rcu_head rcu;
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
/* The 'sel' field MUST be the last field in structure to allow for
* tc_u32_keys allocated at end of structure.
*/
@@ -93,7 +96,7 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
- struct Qdisc *q;
+ struct tcf_block *block;
int refcnt;
struct idr handle_idr;
struct hlist_node hnode;
@@ -335,11 +338,7 @@ static struct hlist_head *tc_u_common_hash;
static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- u32 qhandle = tp->q->handle;
- int ifindex = dev->ifindex;
-
- return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
+ return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
}
static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -349,7 +348,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
h = tc_u_hash(tp);
hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
- if (tc->q == tp->q)
+ if (tc->block == tp->chain->block)
return tc;
}
return NULL;
@@ -378,7 +377,7 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->q = tp->q;
+ tp_c->block = tp->chain->block;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
@@ -422,11 +421,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
* this the u32_delete_key_rcu variant does not free the percpu
* statistics.
*/
+static void u32_delete_key_work(struct work_struct *work)
+{
+ struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+ rtnl_lock();
+ u32_destroy_key(key->tp, key, false);
+ rtnl_unlock();
+}
+
static void u32_delete_key_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key, false);
+ INIT_WORK(&key->work, u32_delete_key_work);
+ tcf_queue_work(&key->work);
}
/* u32_delete_key_freepf_rcu is the rcu callback variant
@@ -436,11 +445,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu)
* for the variant that should be used with keys return from
* u32_init_knode()
*/
+static void u32_delete_key_freepf_work(struct work_struct *work)
+{
+ struct tc_u_knode *key = container_of(work, struct tc_u_knode, work);
+
+ rtnl_lock();
+ u32_destroy_key(key->tp, key, true);
+ rtnl_unlock();
+}
+
static void u32_delete_key_freepf_rcu(struct rcu_head *rcu)
{
struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu);
- u32_destroy_key(key->tp, key, true);
+ INIT_WORK(&key->work, u32_delete_key_freepf_work);
+ tcf_queue_work(&key->work);
}
static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
@@ -466,71 +485,69 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- if (!tc_should_offload(dev, 0))
- return;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
- cls_u32.command = TC_CLSU32_DELETE_KNODE;
- cls_u32.knode.handle = handle;
+ cls_u32.command = TC_CLSU32_DELETE_HNODE;
+ cls_u32.hnode.divisor = h->divisor;
+ cls_u32.hnode.handle = h->handle;
+ cls_u32.hnode.prio = h->prio;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
+ bool offloaded = false;
int err;
- if (!tc_should_offload(dev, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
- if (tc_skip_sw(flags))
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_clear_hw_hnode(tp, h);
return err;
+ } else if (err > 0) {
+ offloaded = true;
+ }
+
+ if (skip_sw && !offloaded)
+ return -EINVAL;
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- if (!tc_should_offload(dev, 0))
- return;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
- cls_u32.command = TC_CLSU32_DELETE_HNODE;
- cls_u32.hnode.divisor = h->divisor;
- cls_u32.hnode.handle = h->handle;
- cls_u32.hnode.prio = h->prio;
+ cls_u32.command = TC_CLSU32_DELETE_KNODE;
+ cls_u32.knode.handle = handle;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
int err;
- if (!tc_should_offload(dev, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
@@ -547,13 +564,16 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
if (n->ht_down)
cls_u32.knode.link_handle = n->ht_down->handle;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
-
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_remove_hw_knode(tp, n->handle);
+ return err;
+ } else if (err > 0) {
n->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- if (tc_skip_sw(flags))
- return err;
+ if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
return 0;
}
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677b..1331a4c 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
struct tcf_ematch_hdr *em_hdr = nla_data(nla);
int data_len = nla_len(nla) - sizeof(*em_hdr);
void *data = (void *) em_hdr + sizeof(*em_hdr);
- struct net *net = dev_net(qdisc_dev(tp->q));
+ struct net *net = tp->chain->block->net;
if (!TCF_EM_REL_VALID(em_hdr->flags))
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa82116..b6c4f53 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -301,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
+ if (!handle)
+ return NULL;
q = qdisc_match_from_root(dev->qdisc, handle);
if (q)
goto out;
@@ -1661,9 +1663,11 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct tcf_bind_args *a = (void *)arg;
if (tp->ops->bind_class) {
- tcf_tree_lock(tp);
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ sch_tree_lock(q);
tp->ops->bind_class(n, a->classid, a->cl);
- tcf_tree_unlock(tp);
+ sch_tree_unlock(q);
}
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c5fcdf1..2dbd249 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -281,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
goto err_out;
}
- error = tcf_block_get(&flow->block, &flow->filter_list);
+ error = tcf_block_get(&flow->block, &flow->filter_list, sch);
if (error) {
kfree(flow);
goto err_out;
@@ -546,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- err = tcf_block_get(&p->link.block, &p->link.filter_list);
+ err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index dcef97f..6361be7 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -255,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
case TC_ACT_RECLASSIFY:
@@ -1566,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
new file mode 100644
index 0000000..7a72980
--- /dev/null
+++ b/net/sched/sch_cbs.c
@@ -0,0 +1,373 @@
+/*
+ * net/sched/sch_cbs.c Credit Based Shaper
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+ *
+ */
+
+/* Credit Based Shaper (CBS)
+ * =========================
+ *
+ * This is a simple rate-limiting shaper aimed at TSN applications on
+ * systems with known traffic workloads.
+ *
+ * Its algorithm is defined by the IEEE 802.1Q-2014 Specification,
+ * Section 8.6.8.2, and explained in more detail in the Annex L of the
+ * same specification.
+ *
+ * There are four tunables to be considered:
+ *
+ * 'idleslope': Idleslope is the rate of credits that is
+ * accumulated (in kilobits per second) when there is at least
+ * one packet waiting for transmission. Packets are transmitted
+ * when the current value of credits is equal or greater than
+ * zero. When there is no packet to be transmitted the amount of
+ * credits is set to zero. This is the main tunable of the CBS
+ * algorithm.
+ *
+ * 'sendslope':
+ * Sendslope is the rate of credits that is depleted (it should be a
+ * negative number of kilobits per second) when a transmission is
+ * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section
+ * 8.6.8.2 item g):
+ *
+ * sendslope = idleslope - port_transmit_rate
+ *
+ * 'hicredit': Hicredit defines the maximum amount of credits (in
+ * bytes) that can be accumulated. Hicredit depends on the
+ * characteristics of interfering traffic,
+ * 'max_interference_size' is the maximum size of any burst of
+ * traffic that can delay the transmission of a frame that is
+ * available for transmission for this traffic class, (IEEE
+ * 802.1Q-2014 Annex L, Equation L-3):
+ *
+ * hicredit = max_interference_size * (idleslope / port_transmit_rate)
+ *
+ * 'locredit': Locredit is the minimum amount of credits that can
+ * be reached. It is a function of the traffic flowing through
+ * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2):
+ *
+ * locredit = max_frame_size * (sendslope / port_transmit_rate)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+
+#define BYTES_PER_KBIT (1000LL / 8)
+
+struct cbs_sched_data {
+ bool offload;
+ int queue;
+ s64 port_rate; /* in bytes/s */
+ s64 last; /* timestamp in ns */
+ s64 credits; /* in bytes */
+ s32 locredit; /* in bytes */
+ s32 hicredit; /* in bytes */
+ s64 sendslope; /* in bytes/s */
+ s64 idleslope; /* in bytes/s */
+ struct qdisc_watchdog watchdog;
+ int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch);
+ struct sk_buff *(*dequeue)(struct Qdisc *sch);
+};
+
+static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch)
+{
+ return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ if (sch->q.qlen == 0 && q->credits > 0) {
+ /* We need to stop accumulating credits when there's
+ * no enqueued packets and q->credits is positive.
+ */
+ q->credits = 0;
+ q->last = ktime_get_ns();
+ }
+
+ return qdisc_enqueue_tail(skb, sch);
+}
+
+static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ return q->enqueue(skb, sch);
+}
+
+/* timediff is in ns, slope is in bytes/s */
+static s64 timediff_to_credits(s64 timediff, s64 slope)
+{
+ return div64_s64(timediff * slope, NSEC_PER_SEC);
+}
+
+static s64 delay_from_credits(s64 credits, s64 slope)
+{
+ if (unlikely(slope == 0))
+ return S64_MAX;
+
+ return div64_s64(-credits * NSEC_PER_SEC, slope);
+}
+
+static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate)
+{
+ if (unlikely(port_rate == 0))
+ return S64_MAX;
+
+ return div64_s64(len * slope, port_rate);
+}
+
+static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ s64 now = ktime_get_ns();
+ struct sk_buff *skb;
+ s64 credits;
+ int len;
+
+ if (q->credits < 0) {
+ credits = timediff_to_credits(now - q->last, q->idleslope);
+
+ credits = q->credits + credits;
+ q->credits = min_t(s64, credits, q->hicredit);
+
+ if (q->credits < 0) {
+ s64 delay;
+
+ delay = delay_from_credits(q->credits, q->idleslope);
+ qdisc_watchdog_schedule_ns(&q->watchdog, now + delay);
+
+ q->last = now;
+
+ return NULL;
+ }
+ }
+
+ skb = qdisc_dequeue_head(sch);
+ if (!skb)
+ return NULL;
+
+ len = qdisc_pkt_len(skb);
+
+ /* As sendslope is a negative number, this will decrease the
+ * amount of q->credits.
+ */
+ credits = credits_from_len(len, q->sendslope, q->port_rate);
+ credits += q->credits;
+
+ q->credits = max_t(s64, credits, q->locredit);
+ q->last = now;
+
+ return skb;
+}
+
+static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch)
+{
+ return qdisc_dequeue_head(sch);
+}
+
+static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+
+ return q->dequeue(sch);
+}
+
+static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
+ [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
+};
+
+static void cbs_disable_offload(struct net_device *dev,
+ struct cbs_sched_data *q)
+{
+ struct tc_cbs_qopt_offload cbs = { };
+ const struct net_device_ops *ops;
+ int err;
+
+ if (!q->offload)
+ return;
+
+ q->enqueue = cbs_enqueue_soft;
+ q->dequeue = cbs_dequeue_soft;
+
+ ops = dev->netdev_ops;
+ if (!ops->ndo_setup_tc)
+ return;
+
+ cbs.queue = q->queue;
+ cbs.enable = 0;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+ if (err < 0)
+ pr_warn("Couldn't disable CBS offload for queue %d\n",
+ cbs.queue);
+}
+
+static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q,
+ const struct tc_cbs_qopt *opt)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct tc_cbs_qopt_offload cbs = { };
+ int err;
+
+ if (!ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ cbs.queue = q->queue;
+
+ cbs.enable = 1;
+ cbs.hicredit = opt->hicredit;
+ cbs.locredit = opt->locredit;
+ cbs.idleslope = opt->idleslope;
+ cbs.sendslope = opt->sendslope;
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs);
+ if (err < 0)
+ return err;
+
+ q->enqueue = cbs_enqueue_offload;
+ q->dequeue = cbs_dequeue_offload;
+
+ return 0;
+}
+
+static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct nlattr *tb[TCA_CBS_MAX + 1];
+ struct tc_cbs_qopt *qopt;
+ int err;
+
+ err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_CBS_PARMS])
+ return -EINVAL;
+
+ qopt = nla_data(tb[TCA_CBS_PARMS]);
+
+ if (!qopt->offload) {
+ struct ethtool_link_ksettings ecmd;
+ s64 link_speed;
+
+ if (!__ethtool_get_link_ksettings(dev, &ecmd))
+ link_speed = ecmd.base.speed;
+ else
+ link_speed = SPEED_1000;
+
+ q->port_rate = link_speed * 1000 * BYTES_PER_KBIT;
+
+ cbs_disable_offload(dev, q);
+ } else {
+ err = cbs_enable_offload(dev, q, qopt);
+ if (err < 0)
+ return err;
+ }
+
+ /* Everything went OK, save the parameters used. */
+ q->hicredit = qopt->hicredit;
+ q->locredit = qopt->locredit;
+ q->idleslope = qopt->idleslope * BYTES_PER_KBIT;
+ q->sendslope = qopt->sendslope * BYTES_PER_KBIT;
+ q->offload = qopt->offload;
+
+ return 0;
+}
+
+static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ if (!opt)
+ return -EINVAL;
+
+ q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
+
+ q->enqueue = cbs_enqueue_soft;
+ q->dequeue = cbs_dequeue_soft;
+
+ qdisc_watchdog_init(&q->watchdog, sch);
+
+ return cbs_change(sch, opt);
+}
+
+static void cbs_destroy(struct Qdisc *sch)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ qdisc_watchdog_cancel(&q->watchdog);
+
+ cbs_disable_offload(dev, q);
+}
+
+static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct cbs_sched_data *q = qdisc_priv(sch);
+ struct tc_cbs_qopt opt = { };
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ opt.hicredit = q->hicredit;
+ opt.locredit = q->locredit;
+ opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT);
+ opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT);
+ opt.offload = q->offload;
+
+ if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, nest);
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
+ .id = "cbs",
+ .priv_size = sizeof(struct cbs_sched_data),
+ .enqueue = cbs_enqueue,
+ .dequeue = cbs_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = cbs_init,
+ .reset = qdisc_reset_queue,
+ .destroy = cbs_destroy,
+ .change = cbs_change,
+ .dump = cbs_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cbs_module_init(void)
+{
+ return register_qdisc(&cbs_qdisc_ops);
+}
+
+static void __exit cbs_module_exit(void)
+{
+ unregister_qdisc(&cbs_qdisc_ops);
+}
+module_init(cbs_module_init)
+module_exit(cbs_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 2d0e8d4..5bbcef3 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -321,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -412,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct drr_sched *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2836c80..fb4fb71 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = tcf_block_get(&p->block, &p->filter_list);
+ err = tcf_block_get(&p->block, &p->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index de3b57c..0305d79 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
return err;
}
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a0a19876..3839cbb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -288,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
}
EXPORT_SYMBOL(dev_trans_start);
-static void dev_watchdog(unsigned long arg)
+static void dev_watchdog(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
+ struct net_device *dev = from_timer(dev, t, watchdog_timer);
netif_tx_lock(dev);
if (!qdisc_tx_is_noop(dev)) {
@@ -603,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct Qdisc *sch;
unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
int err = -ENOBUFS;
- struct net_device *dev = dev_queue->dev;
+ struct net_device *dev;
+ if (!dev_queue) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ dev = dev_queue->dev;
p = kzalloc_node(size, GFP_KERNEL,
netdev_queue_numa_node_read(dev_queue));
@@ -954,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev)
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
+ timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
static void shutdown_scheduler_queue(struct net_device *dev,
@@ -1018,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
}
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
+
+static void mini_qdisc_rcu_func(struct rcu_head *head)
+{
+}
+
+void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
+ struct tcf_proto *tp_head)
+{
+ struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ struct mini_Qdisc *miniq;
+
+ if (!tp_head) {
+ RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+ return;
+ }
+
+ miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
+
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So wait until previous call_rcu_bh callback
+ * is done.
+ */
+ rcu_barrier_bh();
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+
+ if (miniq_old)
+ /* This is counterpart of the rcu barrier above. We need to
+ * block potential new user of miniq_old until all readers
+ * are not seeing it.
+ */
+ call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
+}
+EXPORT_SYMBOL(mini_qdisc_pair_swap);
+
+void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
+ struct mini_Qdisc __rcu **p_miniq)
+{
+ miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
+ miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->p_miniq = p_miniq;
+}
+EXPORT_SYMBOL(mini_qdisc_pair_init);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3f88b75..d04068a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1033,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
@@ -1144,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1405,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
- err = tcf_block_get(&q->root.block, &q->root.filter_list);
+ err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c6d7ae8..fa03807 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -244,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1030,7 +1031,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -1393,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
goto failure;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 44de4ee..5ecc38f 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,8 @@
struct ingress_sched_data {
struct tcf_block *block;
+ struct tcf_block_ext_info block_info;
+ struct mini_Qdisc_pair miniqp;
};
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -53,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl)
return q->block;
}
+static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv)
+{
+ struct mini_Qdisc_pair *miniqp = priv;
+
+ mini_qdisc_pair_swap(miniqp, tp_head);
+}
+
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
+
+ q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->block_info.chain_head_change = clsact_chain_head_change;
+ q->block_info.chain_head_change_priv = &q->miniqp;
+
+ err = tcf_block_get_ext(&q->block, sch, &q->block_info);
if (err)
return err;
@@ -73,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
- tcf_block_put(q->block);
+ tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -114,6 +129,10 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
struct clsact_sched_data {
struct tcf_block *ingress_block;
struct tcf_block *egress_block;
+ struct tcf_block_ext_info ingress_block_info;
+ struct tcf_block_ext_info egress_block_info;
+ struct mini_Qdisc_pair miniqp_ingress;
+ struct mini_Qdisc_pair miniqp_egress;
};
static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
@@ -153,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress);
+
+ q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+ q->ingress_block_info.chain_head_change = clsact_chain_head_change;
+ q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress;
+
+ err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info);
if (err)
return err;
- err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+ mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress);
+
+ q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+ q->egress_block_info.chain_head_change = clsact_chain_head_change;
+ q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
+
+ err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
if (err)
- return err;
+ goto err_egress_block_get;
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -167,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_CPUSTATS;
return 0;
+
+err_egress_block_get:
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
+ return err;
}
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
- tcf_block_put(q->egress_block);
- tcf_block_put(q->ingress_block);
+ tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
+ tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3a3e50..213b586 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
struct tcmsg *tcm)
{
- unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
- struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
-
- if (!dev_queue) {
- struct net_device *dev = qdisc_dev(sch);
-
- return netdev_get_tx_queue(dev, 0);
- }
- return dev_queue;
+ return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
}
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6bcdfe6..b85885a9 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
struct mqprio_sched {
struct Qdisc **qdiscs;
+ u16 mode;
+ u16 shaper;
int hw_offload;
+ u32 flags;
+ u64 min_rate[TC_QOPT_MAX_QUEUE];
+ u64 max_rate[TC_QOPT_MAX_QUEUE];
};
static void mqprio_destroy(struct Qdisc *sch)
@@ -39,9 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch)
}
if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt mqprio = {};
-
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio);
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ break;
+ default:
+ return;
+ }
} else {
netdev_set_num_tc(dev, 0);
}
@@ -97,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return 0;
}
+static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
+ [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
+ [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
+};
+
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ const struct nla_policy *policy, int len)
+{
+ int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+ if (nested_len >= nla_attr_size(0))
+ return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+ nested_len, policy, NULL);
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
@@ -105,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int rem;
+ int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -115,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
+ /* make certain can allocate enough classids to handle queues */
+ if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
+ return -ENOMEM;
+
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
@@ -122,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (mqprio_parse_opt(dev, qopt))
return -EINVAL;
+ len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
+ if (len > 0) {
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+ }
+
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
@@ -146,14 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt mqprio = *qopt;
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ err = dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_MQPRIO,
&mqprio);
if (err)
return err;
- priv->hw_offload = mqprio.hw;
+ priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -193,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
unsigned long cl)
{
struct net_device *dev = qdisc_dev(sch);
- unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+ unsigned long ntx = cl - 1;
if (ntx >= dev->num_tx_queues)
return NULL;
@@ -223,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0;
}
+static int dump_rates(struct mqprio_sched *priv,
+ struct tc_mqprio_qopt *opt, struct sk_buff *skb)
+{
+ struct nlattr *nest;
+ int i;
+
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
+ sizeof(priv->min_rate[i]),
+ &priv->min_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
+ sizeof(priv->max_rate[i]),
+ &priv->max_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
unsigned int i;
@@ -258,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[i] = dev->tc_to_txq[i].offset;
}
- if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_MODE) &&
+ nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
goto nla_put_failure;
- return skb->len;
+ if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
+ nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
+ priv->flags & TC_MQPRIO_F_MAX_RATE) &&
+ (dump_rates(priv, &opt, skb) != 0))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, nla);
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_trim(skb, nla);
return -1;
}
@@ -282,38 +453,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx = TC_H_MIN(classid);
- if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
- return 0;
- return ntx;
+ /* There are essentially two regions here that have valid classid
+ * values. The first region will have a classid value of 1 through
+ * num_tx_queues. All of these are backed by actual Qdiscs.
+ */
+ if (ntx < TC_H_MIN_PRIORITY)
+ return (ntx <= dev->num_tx_queues) ? ntx : 0;
+
+ /* The second region represents the hardware traffic classes. These
+ * are represented by classid values of TC_H_MIN_PRIORITY through
+ * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
+ */
+ return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
}
static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct net_device *dev = qdisc_dev(sch);
+ if (cl < TC_H_MIN_PRIORITY) {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+ struct net_device *dev = qdisc_dev(sch);
+ int tc = netdev_txq_to_tc(dev, cl - 1);
- if (cl <= netdev_get_num_tc(dev)) {
+ tcm->tcm_parent = (tc < 0) ? 0 :
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(tc + TC_H_MIN_PRIORITY));
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ } else {
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_info = 0;
- } else {
- int i;
- struct netdev_queue *dev_queue;
-
- dev_queue = mqprio_queue_get(sch, cl);
- tcm->tcm_parent = 0;
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- struct netdev_tc_txq tc = dev->tc_to_txq[i];
- int q_idx = cl - netdev_get_num_tc(dev);
-
- if (q_idx > tc.offset &&
- q_idx <= tc.offset + tc.count) {
- tcm->tcm_parent =
- TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(i + 1));
- break;
- }
- }
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
}
tcm->tcm_handle |= TC_H_MIN(cl);
return 0;
@@ -324,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct net_device *dev = qdisc_dev(sch);
-
- if (cl <= netdev_get_num_tc(dev)) {
+ if (cl >= TC_H_MIN_PRIORITY) {
int i;
__u32 qlen = 0;
struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
- struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
/* Drop lock here it will be reclaimed before touching
* statistics this is required because the d->lock we
@@ -385,17 +552,36 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
/* Walk hierarchy with a virtual class per tc */
arg->count = arg->skip;
- for (ntx = arg->skip;
- ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
- ntx++) {
+ for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
+ if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+ arg->count++;
+ }
+
+ /* Pad the values and skip over unused traffic classes */
+ if (ntx < TC_MAX_QUEUE) {
+ arg->count = TC_MAX_QUEUE;
+ ntx = TC_MAX_QUEUE;
+ }
+
+ /* Reset offset, sort out remaining per-queue qdiscs */
+ for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
if (arg->fn(sch, ntx + 1, arg) < 0) {
arg->stop = 1;
- break;
+ return;
}
arg->count++;
}
}
+static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch,
+ struct tcmsg *tcm)
+{
+ return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
+}
+
static const struct Qdisc_class_ops mqprio_class_ops = {
.graft = mqprio_graft,
.leaf = mqprio_leaf,
@@ -403,6 +589,7 @@ static const struct Qdisc_class_ops mqprio_class_ops = {
.walk = mqprio_walk,
.dump = mqprio_dump_class,
.dump_stats = mqprio_dump_class_stats,
+ .select_queue = mqprio_select_queue,
};
static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index ff4fc3e..0122163 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5a4f100..db0228a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -148,12 +148,6 @@ struct netem_skb_cb {
psched_time_t time_to_send;
};
-
-static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
-{
- return rb_entry(rb, struct sk_buff, rbnode);
-}
-
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
{
/* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -364,7 +358,7 @@ static void tfifo_reset(struct Qdisc *sch)
struct rb_node *p = rb_first(&q->t_root);
while (p) {
- struct sk_buff *skb = netem_rb_to_skb(p);
+ struct sk_buff *skb = rb_to_skb(p);
p = rb_next(p);
rb_erase(&skb->rbnode, &q->t_root);
@@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
struct sk_buff *skb;
parent = *p;
- skb = netem_rb_to_skb(parent);
+ skb = rb_to_skb(parent);
if (tnext >= netem_skb_cb(skb)->time_to_send)
p = &parent->rb_right;
else
@@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff *t_skb;
struct netem_skb_cb *t_last;
- t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb);
if (!last ||
t_last->time_to_send > last->time_to_send) {
@@ -617,7 +611,7 @@ deliver:
if (p) {
psched_time_t time_to_send;
- skb = netem_rb_to_skb(p);
+ skb = rb_to_skb(p);
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d..776c694 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
struct pie_vars vars;
struct pie_stats stats;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
};
static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
pie_vars_init(&q->vars);
}
-static void pie_timer(unsigned long arg)
+static void pie_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct pie_sched_data *q = qdisc_priv(sch);
+ struct pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
pie_vars_init(&q->vars);
sch->limit = q->params.limit;
- setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, pie_timer, 0);
if (opt) {
int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2dd6c68..2c79559 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ddfd49..6962b37 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -709,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1419,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 93b9d70..007dd8e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <net/inet_ecn.h>
#include <net/red.h>
@@ -40,6 +41,7 @@ struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
struct red_parms parms;
struct red_vars vars;
struct red_stats stats;
@@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch)
red_restart(&q->vars);
}
+static int red_offload(struct Qdisc *sch, bool enable)
+{
+ struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload opt = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ if (enable) {
+ opt.command = TC_RED_REPLACE;
+ opt.set.min = q->parms.qth_min >> q->parms.Wlog;
+ opt.set.max = q->parms.qth_max >> q->parms.Wlog;
+ opt.set.probability = q->parms.max_P;
+ opt.set.is_ecn = red_use_ecn(q);
+ } else {
+ opt.command = TC_RED_DESTROY;
+ }
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
+}
+
static void red_destroy(struct Qdisc *sch)
{
struct red_sched_data *q = qdisc_priv(sch);
del_timer_sync(&q->adapt_timer);
+ red_offload(sch, false);
qdisc_destroy(q->qdisc);
}
@@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
red_start_of_idle_period(&q->vars);
sch_tree_unlock(sch);
+ red_offload(sch, true);
return 0;
}
-static inline void red_adaptative_timer(unsigned long arg)
+static inline void red_adaptative_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct red_sched_data *q = qdisc_priv(sch);
+ struct red_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -238,10 +267,38 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
- setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
return red_change(sch, opt);
}
+static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_red_qopt_offload hw_stats = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ .command = TC_RED_STATS,
+ .stats.bstats = &sch->bstats,
+ .stats.qstats = &sch->qstats,
+ };
+ int err;
+
+ opt->flags &= ~TC_RED_OFFLOADED;
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return 0;
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
+ &hw_stats);
+ if (err == -EOPNOTSUPP)
+ return 0;
+
+ if (!err)
+ opt->flags |= TC_RED_OFFLOADED;
+
+ return err;
+}
+
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
@@ -255,8 +312,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
.Plog = q->parms.Plog,
.Scell_log = q->parms.Scell_log,
};
+ int err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
+ err = red_dump_offload(sch, &opt);
+ if (err)
+ goto nla_put_failure;
+
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
@@ -273,6 +335,7 @@ nla_put_failure:
static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
struct tc_red_xstats st = {
.early = q->stats.prob_drop + q->stats.forced_drop,
.pdrop = q->stats.pdrop,
@@ -280,6 +343,24 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.marked = q->stats.prob_mark + q->stats.forced_mark,
};
+ if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) {
+ struct red_stats hw_stats = {0};
+ struct tc_red_qopt_offload hw_stats_request = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ .command = TC_RED_XSTATS,
+ .xstats = &hw_stats,
+ };
+ if (!dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_QDISC_RED,
+ &hw_stats_request)) {
+ st.early += hw_stats.prob_drop + hw_stats.forced_drop;
+ st.pdrop += hw_stats.pdrop;
+ st.other += hw_stats.other;
+ st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
+ }
+ }
+
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index cc39e17..0678deb 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return false;
}
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
struct sfb_sched_data *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 74ea863..890f4a4 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
int perturb_period;
unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
struct timer_list perturb_timer;
+ struct Qdisc *sch;
};
/*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -604,10 +606,10 @@ drop:
qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
-static void sfq_perturbation(unsigned long arg)
+static void sfq_perturbation(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -722,10 +724,9 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
int i;
int err;
- setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
- (unsigned long)sch);
+ timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index bf90c53..1ca84a2 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for SCTP support code.
#
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dfb9651..69394f4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(
/* Initializes the timers */
for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
- setup_timer(&asoc->timers[i], sctp_timer_events[i],
- (unsigned long)asoc);
+ timer_setup(&asoc->timers[i], sctp_timer_events[i], 0);
/* Pull default initialization values from the sock options.
* Note: This assumes that the values have already been
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 92a0714..621b5ca 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
{
struct dst_entry *dst;
- if (!t)
+ if (sock_owned_by_user(sk) || !t)
return;
dst = sctp_transport_dst_check(t);
if (dst)
@@ -794,7 +794,7 @@ hit:
struct sctp_hash_cmp_arg {
const union sctp_addr *paddr;
const struct net *net;
- u16 lport;
+ __be16 lport;
};
static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -820,37 +820,37 @@ out:
return err;
}
-static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
{
const struct sctp_transport *t = data;
const union sctp_addr *paddr = &t->ipaddr;
const struct net *net = sock_net(t->asoc->base.sk);
- u16 lport = htons(t->asoc->base.bind_addr.port);
- u32 addr;
+ __be16 lport = htons(t->asoc->base.bind_addr.port);
+ __u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
- addr = paddr->v4.sin_addr.s_addr;
+ addr = (__force __u32)paddr->v4.sin_addr.s_addr;
- return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
-static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
+static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
{
const struct sctp_hash_cmp_arg *x = data;
const union sctp_addr *paddr = x->paddr;
const struct net *net = x->net;
- u16 lport = x->lport;
- u32 addr;
+ __be16 lport = x->lport;
+ __u32 addr;
if (paddr->sa.sa_family == AF_INET6)
addr = jhash(&paddr->v6.sin6_addr, 16, seed);
else
- addr = paddr->v4.sin_addr.s_addr;
+ addr = (__force __u32)paddr->v4.sin_addr.s_addr;
- return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 |
+ return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
(__force __u32)lport, net_hash_mix(net), seed);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 51c4887..a6dfa86 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -738,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
/* Was this packet marked by Explicit Congestion Notification? */
static int sctp_v6_is_ce(const struct sk_buff *skb)
{
- return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
+ return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20);
}
/* Dump the v6 addr to the seq file. */
@@ -882,8 +882,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
net = sock_net(&opt->inet.sk);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
- if (!dev ||
- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
+ if (!dev || !(opt->inet.freebind ||
+ net->ipv6.sysctl.ip_nonlocal_bind ||
+ ipv6_chk_addr(net, &addr->v6.sin6_addr,
+ dev, 0))) {
rcu_read_unlock();
return 0;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fcd80fe..f5172c2 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk)
INET_ECN_xmit(sk);
}
-static void sctp_addr_wq_timeout_handler(unsigned long arg)
+static void sctp_addr_wq_timeout_handler(struct timer_list *t)
{
- struct net *net = (struct net *)arg;
+ struct net *net = from_timer(net, t, sctp.addr_wq_timer);
struct sctp_sockaddr_entry *addrw, *temp;
struct sctp_sock *sp;
@@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net)
INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
spin_lock_init(&net->sctp.addr_wq_lock);
net->sctp.addr_wq_timer.expires = 0;
- setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
- (unsigned long)net);
+ timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
return 0;
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 22ed01a..a72a7d9 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -463,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
.r = r,
.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
};
+ int pos = cb->args[2];
/* eps hashtable dumps
* args:
@@ -493,7 +494,8 @@ skip:
goto done;
sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, (int *)&cb->args[2], &commp);
+ net, &pos, &commp);
+ cb->args[2] = pos;
done:
cb->args[1] = cb->args[4];
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ca8f196..514465b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2854,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
addr_param_len = af->to_addr_param(addr, &addr_param);
param.param_hdr.type = flags;
param.param_hdr.length = htons(paramlen + addr_param_len);
- param.crr_id = i;
+ param.crr_id = htonl(i);
sctp_addto_chunk(retval, paramlen, &param);
sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -2867,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc,
addr_param_len = af->to_addr_param(addr, &addr_param);
param.param_hdr.type = SCTP_PARAM_DEL_IP;
param.param_hdr.length = htons(paramlen + addr_param_len);
- param.crr_id = i;
+ param.crr_id = htonl(i);
sctp_addto_chunk(retval, paramlen, &param);
sctp_addto_chunk(retval, addr_param_len, &addr_param);
@@ -3591,7 +3591,7 @@ static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc,
*/
struct sctp_chunk *sctp_make_strreset_req(
const struct sctp_association *asoc,
- __u16 stream_num, __u16 *stream_list,
+ __u16 stream_num, __be16 *stream_list,
bool out, bool in)
{
struct sctp_strreset_outreq outreq;
@@ -3788,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc,
{
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- __u16 last = 0, cnt = 0;
+ __be16 last = 0;
+ __u16 cnt = 0;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
sctp_walk_params(param, hdr, params) {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 402bfbb..df94d77 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -243,9 +243,10 @@ nomem:
/* When the T3-RTX timer expires, it calls this function to create the
* relevant state machine event.
*/
-void sctp_generate_t3_rtx_event(unsigned long peer)
+void sctp_generate_t3_rtx_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *) peer;
+ struct sctp_transport *transport =
+ from_timer(transport, t, T3_rtx_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -319,50 +320,63 @@ out_unlock:
sctp_association_put(asoc);
}
-static void sctp_generate_t1_cookie_event(unsigned long data)
+static void sctp_generate_t1_cookie_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE);
}
-static void sctp_generate_t1_init_event(unsigned long data)
+static void sctp_generate_t1_init_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT);
}
-static void sctp_generate_t2_shutdown_event(unsigned long data)
+static void sctp_generate_t2_shutdown_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN);
}
-static void sctp_generate_t4_rto_event(unsigned long data)
+static void sctp_generate_t4_rto_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO);
}
-static void sctp_generate_t5_shutdown_guard_event(unsigned long data)
+static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *)data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t,
+ timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]);
+
sctp_generate_timeout_event(asoc,
SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD);
} /* sctp_generate_t5_shutdown_guard_event() */
-static void sctp_generate_autoclose_event(unsigned long data)
+static void sctp_generate_autoclose_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *) data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE);
}
/* Generate a heart beat event. If the sock is busy, reschedule. Make
* sure that the transport is still valid.
*/
-void sctp_generate_heartbeat_event(unsigned long data)
+void sctp_generate_heartbeat_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *) data;
+ struct sctp_transport *transport = from_timer(transport, t, hb_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -405,9 +419,10 @@ out_unlock:
/* Handle the timeout of the ICMP protocol unreachable timer. Trigger
* the correct state machine transition that will close the association.
*/
-void sctp_generate_proto_unreach_event(unsigned long data)
+void sctp_generate_proto_unreach_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *)data;
+ struct sctp_transport *transport =
+ from_timer(transport, t, proto_unreach_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -439,9 +454,10 @@ out_unlock:
}
/* Handle the timeout of the RE-CONFIG timer. */
-void sctp_generate_reconf_event(unsigned long data)
+void sctp_generate_reconf_event(struct timer_list *t)
{
- struct sctp_transport *transport = (struct sctp_transport *)data;
+ struct sctp_transport *transport =
+ from_timer(transport, t, reconf_timer);
struct sctp_association *asoc = transport->asoc;
struct sock *sk = asoc->base.sk;
struct net *net = sock_net(sk);
@@ -471,24 +487,27 @@ out_unlock:
}
/* Inject a SACK Timeout event into the state machine. */
-static void sctp_generate_sack_event(unsigned long data)
+static void sctp_generate_sack_event(struct timer_list *t)
{
- struct sctp_association *asoc = (struct sctp_association *)data;
+ struct sctp_association *asoc =
+ from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]);
+
sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK);
}
sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
- NULL,
- sctp_generate_t1_cookie_event,
- sctp_generate_t1_init_event,
- sctp_generate_t2_shutdown_event,
- NULL,
- sctp_generate_t4_rto_event,
- sctp_generate_t5_shutdown_guard_event,
- NULL,
- NULL,
- sctp_generate_sack_event,
- sctp_generate_autoclose_event,
+ [SCTP_EVENT_TIMEOUT_NONE] = NULL,
+ [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event,
+ [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event,
+ [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event,
+ [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL,
+ [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event,
+ [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] =
+ sctp_generate_t5_shutdown_guard_event,
+ [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL,
+ [SCTP_EVENT_TIMEOUT_RECONF] = NULL,
+ [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event,
+ [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event,
};
@@ -1610,12 +1629,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_INIT_FAILED:
- sctp_cmd_init_failed(commands, asoc, cmd->obj.err);
+ sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
break;
case SCTP_CMD_ASSOC_FAILED:
sctp_cmd_assoc_failed(commands, asoc, event_type,
- subtype, chunk, cmd->obj.err);
+ subtype, chunk, cmd->obj.u32);
break;
case SCTP_CMD_INIT_COUNTER_INC:
@@ -1683,8 +1702,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
case SCTP_CMD_PROCESS_CTSN:
/* Dummy up a SACK for processing. */
sackh.cum_tsn_ack = cmd->obj.be32;
- sackh.a_rwnd = asoc->peer.rwnd +
- asoc->outqueue.outstanding_bytes;
+ sackh.a_rwnd = htonl(asoc->peer.rwnd +
+ asoc->outqueue.outstanding_bytes);
sackh.num_gap_ack_blocks = 0;
sackh.num_dup_tsns = 0;
chunk->subh.sack_hdr = &sackh;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 88c2842..b029757 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -171,6 +171,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sk_mem_charge(sk, chunk->skb->truesize);
}
+static void sctp_clear_owner_w(struct sctp_chunk *chunk)
+{
+ skb_orphan(chunk->skb);
+}
+
+static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
+ void (*cb)(struct sctp_chunk *))
+
+{
+ struct sctp_outq *q = &asoc->outqueue;
+ struct sctp_transport *t;
+ struct sctp_chunk *chunk;
+
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->retransmit, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->sacked, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->abandoned, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->out_chunk_list, list)
+ cb(chunk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -4978,6 +5008,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
struct socket *sock;
int err = 0;
+ /* Do not peel off from one netns to another one. */
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
+ return -EINVAL;
+
if (!asoc)
return -EINVAL;
@@ -8375,7 +8409,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* paths won't try to lock it and then oldsk.
*/
lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
sctp_assoc_migrate(assoc, newsk);
+ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
/* If the association on the newsk is already closed before accept()
* is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 5ea33a2..b8c8cab 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -261,6 +261,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
__u16 i, str_nums, *str_list;
struct sctp_chunk *chunk;
int retval = -EINVAL;
+ __be16 *nstr_list;
bool out, in;
if (!asoc->peer.reconf_capable ||
@@ -291,13 +292,18 @@ int sctp_send_reset_streams(struct sctp_association *asoc,
if (str_list[i] >= stream->incnt)
goto out;
+ nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL);
+ if (!nstr_list) {
+ retval = -ENOMEM;
+ goto out;
+ }
+
for (i = 0; i < str_nums; i++)
- str_list[i] = htons(str_list[i]);
+ nstr_list[i] = htons(str_list[i]);
- chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in);
+ chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in);
- for (i = 0; i < str_nums; i++)
- str_list[i] = ntohs(str_list[i]);
+ kfree(nstr_list);
if (!chunk) {
retval = -ENOMEM;
@@ -387,7 +393,7 @@ int sctp_send_add_streams(struct sctp_association *asoc,
{
struct sctp_stream *stream = &asoc->stream;
struct sctp_chunk *chunk = NULL;
- int retval = -ENOMEM;
+ int retval;
__u32 outcnt, incnt;
__u16 out, in;
@@ -419,8 +425,10 @@ int sctp_send_add_streams(struct sctp_association *asoc,
}
chunk = sctp_make_strreset_addstrm(asoc, out, in);
- if (!chunk)
+ if (!chunk) {
+ retval = -ENOMEM;
goto out;
+ }
asoc->strreset_chunk = chunk;
sctp_chunk_hold(asoc->strreset_chunk);
@@ -442,7 +450,7 @@ out:
}
static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq,
+ struct sctp_association *asoc, __be32 resp_seq,
__be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
@@ -482,8 +490,9 @@ struct sctp_chunk *sctp_process_strreset_outreq(
{
struct sctp_strreset_outreq *outreq = param.v;
struct sctp_stream *stream = &asoc->stream;
- __u16 i, nums, flags = 0, *str_p = NULL;
__u32 result = SCTP_STRRESET_DENIED;
+ __u16 i, nums, flags = 0;
+ __be16 *str_p = NULL;
__u32 request_seq;
request_seq = ntohl(outreq->request_seq);
@@ -576,8 +585,9 @@ struct sctp_chunk *sctp_process_strreset_inreq(
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
struct sctp_chunk *chunk = NULL;
- __u16 i, nums, *str_p;
__u32 request_seq;
+ __u16 i, nums;
+ __be16 *str_p;
request_seq = ntohl(inreq->request_seq);
if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -897,7 +907,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
struct sctp_strreset_outreq *outreq;
- __u16 *str_p;
+ __be16 *str_p;
outreq = (struct sctp_strreset_outreq *)req;
str_p = outreq->list_of_streams;
@@ -922,7 +932,7 @@ struct sctp_chunk *sctp_process_strreset_resp(
nums, str_p, GFP_ATOMIC);
} else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
struct sctp_strreset_inreq *inreq;
- __u16 *str_p;
+ __be16 *str_p;
/* if the result is performed, it's impossible for inreq */
if (result == SCTP_STRRESET_PERFORMED)
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index 03513a9..0b83ec5 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -124,7 +124,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = {
extern struct sctp_sched_ops sctp_sched_prio;
extern struct sctp_sched_ops sctp_sched_rr;
-struct sctp_sched_ops *sctp_sched_ops[] = {
+static struct sctp_sched_ops *sctp_sched_ops[] = {
&sctp_sched_fcfs,
&sctp_sched_prio,
&sctp_sched_rr,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 2d9bd37..1e5a224 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
INIT_LIST_HEAD(&peer->send_ready);
INIT_LIST_HEAD(&peer->transports);
- setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
- (unsigned long)peer);
- setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
- (unsigned long)peer);
- setup_timer(&peer->reconf_timer, sctp_generate_reconf_event,
- (unsigned long)peer);
- setup_timer(&peer->proto_unreach_timer,
- sctp_generate_proto_unreach_event, (unsigned long)peer);
+ timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
+ timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
+ timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+ timer_setup(&peer->proto_unreach_timer,
+ sctp_generate_proto_unreach_event, 0);
/* Initialize the 64-bit random nonce sent with heartbeat. */
get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 67abc01..5447228 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event(
const struct sctp_association *asoc, __u16 flags, __u16 stream_num,
- __u16 *stream_list, gfp_t gfp)
+ __be16 *stream_list, gfp_t gfp)
{
struct sctp_stream_reset_event *sreset;
struct sctp_ulpevent *event;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 745f145..6451c50 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
+ /* peer has not signalled SMC-capability */
+ smc->use_fallback = true;
+ goto out_connected;
+ }
+
/* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(smc)) {
reason_code = SMC_CLC_DECL_IPSEC;
@@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
}
smc_copy_sock_settings_to_clc(smc);
+ tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc)
goto out;
@@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
u8 prefix_len;
u8 ibport;
+ /* check if peer is smc capable */
+ if (!tcp_sk(newclcsock->sk)->syn_smc) {
+ new_smc->use_fallback = true;
+ goto out_connected;
+ }
+
/* do inband token exchange -
*wait for and receive SMC Proposal CLC message
*/
@@ -808,7 +821,7 @@ static void smc_listen_work(struct work_struct *work)
rc = local_contact;
if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_buf_create(new_smc);
if (rc) {
reason_code = SMC_CLC_DECL_MEM;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
smc_close_init(new_smc);
@@ -831,7 +844,7 @@ static void smc_listen_work(struct work_struct *work)
buf_desc->mr_rx[SMC_SINGLE_LINK]);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
}
}
@@ -839,15 +852,15 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_clc_send_accept(new_smc, local_contact);
if (rc)
- goto out_err;
+ goto out_err_unlock;
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM);
if (reason_code < 0)
- goto out_err;
+ goto out_err_unlock;
if (reason_code > 0)
- goto decline_rdma;
+ goto decline_rdma_unlock;
smc_conn_save_peer_info(new_smc, &cclc);
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &cclc);
@@ -855,34 +868,34 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link);
if (rc) {
reason_code = SMC_CLC_DECL_INTERR;
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
/* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc);
if (reason_code < 0) {
/* peer is not aware of a problem */
rc = reason_code;
- goto out_err;
+ goto out_err_unlock;
}
if (reason_code > 0)
- goto decline_rdma;
+ goto decline_rdma_unlock;
}
smc_tx_init(new_smc);
+ mutex_unlock(&smc_create_lgr_pending);
out_connected:
sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE;
enqueue:
- mutex_unlock(&smc_create_lgr_pending);
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
smc_accept_enqueue(&lsmc->sk, newsmcsk);
@@ -896,6 +909,8 @@ enqueue:
sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
return;
+decline_rdma_unlock:
+ mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn);
@@ -907,6 +922,8 @@ decline_rdma:
}
goto out_connected;
+out_err_unlock:
+ mutex_unlock(&smc_create_lgr_pending);
out_err:
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
@@ -963,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
* them to the clc socket -- copy smc socket options to clc socket
*/
smc_copy_sock_settings_to_clc(smc);
+ tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog);
if (rc)
@@ -1405,6 +1423,7 @@ static int __init smc_init(void)
goto out_sock;
}
+ static_branch_enable(&tcp_have_smc);
return 0;
out_sock:
@@ -1429,6 +1448,7 @@ static void __exit smc_exit(void)
list_del_init(&lgr->list);
smc_lgr_free(lgr); /* free link group */
}
+ static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client();
sock_unregister(PF_SMC);
proto_unregister(&smc_proto);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0ccd6fa..0bee9d1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 5ef97e5a..87f7bed 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 56f883d..149ceda 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b7dd274..1800e16 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 1c55414..12a9af1 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index f0d16fb..48615d2 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -360,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work)
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
sk->sk_state = SMC_PEERCLOSEWAIT2;
- /* fall through to check for closing */
+ /* fall through */
+ /* to check for closing */
case SMC_PEERCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
if (!smc_cdc_rxed_any_close(&smc->conn))
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 4a3d99a..ed82506 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 20b66e7..2578fbd 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -380,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
&gattr))
continue;
- if (gattr.ndev &&
- (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) {
- lnk->gid = gid;
- return 0;
+ if (gattr.ndev) {
+ if (is_vlan_dev(gattr.ndev) &&
+ vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
+ lnk->gid = gid;
+ dev_put(gattr.ndev);
+ return 0;
+ }
+ dev_put(gattr.ndev);
}
}
return -ENODEV;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 19c44bf..fe691bf 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 0b58522..90f1a7f 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
@@ -369,26 +370,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
- struct net_device *ndev;
+ struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
- &smcibdev->gid[ibport - 1], NULL);
- /* the SMC protocol requires specification of the roce MAC address;
- * if net_device cannot be determined, it can be derived from gid 0
- */
- ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
- if (ndev) {
- memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
- dev_put(ndev);
- } else if (!rc) {
- memcpy(&smcibdev->mac[ibport - 1][0],
- &smcibdev->gid[ibport - 1].raw[8], 3);
- memcpy(&smcibdev->mac[ibport - 1][3],
- &smcibdev->gid[ibport - 1].raw[13], 3);
- smcibdev->mac[ibport - 1][0] &= ~0x02;
- }
- return rc;
+ &smcibdev->gid[ibport - 1], &gattr);
+ if (rc || !gattr.ndev)
+ return -ENODEV;
+
+ memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gattr.ndev);
+ return 0;
}
/* Create an identifier unique for this instance of SMC-R.
@@ -419,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
+ /* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 9b927a3..e90630d 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c2f9165..92fe4cc 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index b472f85..51b27ce 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 31f8453..74568cd 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index c4f1bcc..5a29519 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 3e631ae..cbf5863 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index b5b80e1..3a32b59 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index ec49bc3..c48dc2d 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 1d6a0dc..7825596 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 525d91e..de4537f 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 45eb538..2acf12b 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index d4ea46a..c5fda15 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -49,7 +49,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
{
/* Unrecoverable error in receive */
- del_timer(&strp->msg_timer);
+ cancel_delayed_work(&strp->msg_timer_work);
if (strp->stopped)
return;
@@ -68,7 +68,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
static void strp_start_timer(struct strparser *strp, long timeo)
{
if (timeo)
- mod_timer(&strp->msg_timer, timeo);
+ mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo);
}
/* Lower lock held */
@@ -319,7 +319,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
eaten += (cand_len - extra);
/* Hurray, we have a new message! */
- del_timer(&strp->msg_timer);
+ cancel_delayed_work(&strp->msg_timer_work);
strp->skb_head = NULL;
STRP_STATS_INCR(strp->stats.msgs);
@@ -450,9 +450,10 @@ static void strp_work(struct work_struct *w)
do_strp_work(container_of(w, struct strparser, work));
}
-static void strp_msg_timeout(unsigned long arg)
+static void strp_msg_timeout(struct work_struct *w)
{
- struct strparser *strp = (struct strparser *)arg;
+ struct strparser *strp = container_of(w, struct strparser,
+ msg_timer_work.work);
/* Message assembly timed out */
STRP_STATS_INCR(strp->stats.msg_timeouts);
@@ -505,9 +506,7 @@ int strp_init(struct strparser *strp, struct sock *sk,
strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp;
- setup_timer(&strp->msg_timer, strp_msg_timeout,
- (unsigned long)strp);
-
+ INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout);
INIT_WORK(&strp->work, strp_work);
return 0;
@@ -532,7 +531,7 @@ void strp_done(struct strparser *strp)
{
WARN_ON(!strp->stopped);
- del_timer_sync(&strp->msg_timer);
+ cancel_delayed_work_sync(&strp->msg_timer_work);
cancel_work_sync(&strp->work);
if (strp->skb_head) {
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index ea7ffa1..090658c 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel SUN RPC
#
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 14e9e53..c374268 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Linux kernel rpcsec_gss implementation
#
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 5f3d527..75d72e1 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/auth_null.c
*
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 82337e1..dafd6b8 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/auth_unix.c
*
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index c8fd0b6..e980d2a4 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/**
* debugfs interface for sunrpc
*
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 394ce52..7ec10b9 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __SUNRPC_NETNS_H__
#define __SUNRPC_NETNS_H__
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc8383..5dea47e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(unsigned long ptr);
+static void __rpc_queue_timer_fn(struct timer_list *t);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
+ timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -635,9 +635,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
-static void __rpc_queue_timer_fn(unsigned long ptr)
+static void __rpc_queue_timer_fn(struct timer_list *t)
{
- struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr;
+ struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa04666..33f4ae6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks);
- init_timer(&serv->sv_temptimer);
+ timer_setup(&serv->sv_temptimer, NULL, 0);
spin_lock_init(&serv->sv_lock);
__svc_init_bc(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b4..71de77b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
-static void svc_age_temp_xprts(unsigned long closure);
+static void svc_age_temp_xprts(struct timer_list *t);
static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
@@ -785,8 +785,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
serv->sv_tmpcnt++;
if (serv->sv_temptimer.function == NULL) {
/* setup timer to age temp transports */
- setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
- (unsigned long)serv);
+ serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts;
mod_timer(&serv->sv_temptimer,
jiffies + svc_conn_age_period * HZ);
}
@@ -960,9 +959,9 @@ out:
* Timer function to close old temporary transports, using
* a mark-and-sweep algorithm.
*/
-static void svc_age_temp_xprts(unsigned long closure)
+static void svc_age_temp_xprts(struct timer_list *t)
{
- struct svc_serv *serv = (struct svc_serv *)closure;
+ struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
struct svc_xprt *xprt;
struct list_head *le, *next;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e741ec2..6160d17 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
}
static void
-xprt_init_autodisconnect(unsigned long data)
+xprt_init_autodisconnect(struct timer_list *t)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)data;
+ struct rpc_xprt *xprt = from_timer(xprt, t, timer);
spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv))
@@ -1333,7 +1333,7 @@ void xprt_release(struct rpc_task *task)
rpc_count_iostats(task, task->tk_client->cl_metrics);
spin_lock(&xprt->recv_lock);
if (!list_empty(&req->rq_list)) {
- list_del(&req->rq_list);
+ list_del_init(&req->rq_list);
xprt_wait_on_pinned_rqst(req);
}
spin_unlock(&xprt->recv_lock);
@@ -1422,10 +1422,9 @@ found:
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
- setup_timer(&xprt->timer, xprt_init_autodisconnect,
- (unsigned long)xprt);
+ timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
else
- init_timer(&xprt->timer);
+ timer_setup(&xprt->timer, NULL, 0);
if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
xprt_destroy(xprt);
@@ -1445,6 +1444,23 @@ out:
return xprt;
}
+static void xprt_destroy_cb(struct work_struct *work)
+{
+ struct rpc_xprt *xprt =
+ container_of(work, struct rpc_xprt, task_cleanup);
+
+ rpc_xprt_debugfs_unregister(xprt);
+ rpc_destroy_wait_queue(&xprt->binding);
+ rpc_destroy_wait_queue(&xprt->pending);
+ rpc_destroy_wait_queue(&xprt->sending);
+ rpc_destroy_wait_queue(&xprt->backlog);
+ kfree(xprt->servername);
+ /*
+ * Tear down transport state and free the rpc_xprt
+ */
+ xprt->ops->destroy(xprt);
+}
+
/**
* xprt_destroy - destroy an RPC transport, killing off all requests.
* @xprt: transport to destroy
@@ -1454,22 +1470,19 @@ static void xprt_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: destroying transport %p\n", xprt);
- /* Exclude transport connect/disconnect handlers */
+ /*
+ * Exclude transport connect/disconnect handlers and autoclose
+ */
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
del_timer_sync(&xprt->timer);
- rpc_xprt_debugfs_unregister(xprt);
- rpc_destroy_wait_queue(&xprt->binding);
- rpc_destroy_wait_queue(&xprt->pending);
- rpc_destroy_wait_queue(&xprt->sending);
- rpc_destroy_wait_queue(&xprt->backlog);
- cancel_work_sync(&xprt->task_cleanup);
- kfree(xprt->servername);
/*
- * Tear down transport state and free the rpc_xprt
+ * Destroy sockets etc from the system workqueue so they can
+ * safely flush receive work running on rpciod.
*/
- xprt->ops->destroy(xprt);
+ INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb);
+ schedule_work(&xprt->task_cleanup);
}
static void xprt_destroy_kref(struct kref *kref)
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index ae92a9e..e2d64c7 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Multipath support for RPC
*
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index b8213dd..8bf19e1 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index d31d0ac..823a781 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
*
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 6c71513..fa759dd 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5a936a6..35d7517 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
@@ -401,7 +402,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (unlikely(n != mw->mw_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n",
+ dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
__func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index ec37ad8..992594b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015 Oracle. All rights reserved.
*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 7dcda45..9bd0454 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016 Oracle. All rights reserved.
*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9b5de31..4dad5da 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/net/sunrpc/xprtsock.c
*
@@ -2203,7 +2204,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
- struct socket *sock = transport->sock;
+ struct socket *sock;
int status = -EIO;
sock = xs_create_sock(xprt, transport,
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 31b9f9c..37bb0bf 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux TIPC layer
#
@@ -8,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- server.o socket.o
+ server.o socket.o group.o
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 7d99029..329325bd 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct sk_buff_head xmitq;
int rc = 0;
- __skb_queue_head_init(&xmitq);
+ skb_queue_head_init(&xmitq);
tipc_bcast_lock(net);
if (tipc_link_bc_peers(l))
rc = tipc_link_xmit(l, pkts, &xmitq);
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_nlist *dests, u16 *cong_link_cnt)
{
+ struct tipc_dest *dst, *tmp;
struct sk_buff_head _pkts;
- struct u32_item *n, *tmp;
- u32 dst, selector;
+ u32 dnode, selector;
selector = msg_link_selector(buf_msg(skb_peek(pkts)));
- __skb_queue_head_init(&_pkts);
+ skb_queue_head_init(&_pkts);
- list_for_each_entry_safe(n, tmp, &dests->list, list) {
- dst = n->value;
- if (!tipc_msg_pskb_copy(dst, pkts, &_pkts))
+ list_for_each_entry_safe(dst, tmp, &dests->list, list) {
+ dnode = dst->node;
+ if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
return -ENOMEM;
/* Any other return value than -ELINKCONG is ignored */
- if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG)
+ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
(*cong_link_cnt)++;
}
return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = true;
- else if (u32_push(&nl->list, node))
+ else if (tipc_dest_push(&nl->list, node, 0))
nl->remote++;
}
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = false;
- else if (u32_del(&nl->list, node))
+ else if (tipc_dest_del(&nl->list, node, 0))
nl->remote--;
}
void tipc_nlist_purge(struct tipc_nlist *nl)
{
- u32_list_purge(&nl->list);
+ tipc_dest_list_purge(&nl->list);
nl->remote = 0;
nl->local = 0;
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398..9643426 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline struct tipc_server *tipc_topsrv(struct net *net)
+{
+ return tipc_net(net)->topsrv;
+}
+
static inline unsigned int tipc_hashfn(u32 addr)
{
return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 02462d6..92e4828 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
*
* Called whenever a link setup request timer associated with a bearer expires.
*/
-static void disc_timeout(unsigned long data)
+static void disc_timeout(struct timer_list *t)
{
- struct tipc_link_req *req = (struct tipc_link_req *)data;
+ struct tipc_link_req *req = from_timer(req, t, timer);
struct sk_buff *skb;
int max_delay;
@@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
req->num_nodes = 0;
req->timer_intv = TIPC_LINK_REQ_INIT;
spin_lock_init(&req->lock);
- setup_timer(&req->timer, disc_timeout, (unsigned long)req);
+ timer_setup(&req->timer, disc_timeout, 0);
mod_timer(&req->timer, jiffies + req->timer_intv);
b->link_req = req;
*skb = skb_clone(req->buf, GFP_ATOMIC);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 0000000..7821085
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
+/*
+ * net/tipc/group.c: TIPC group messaging code
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "group.h"
+#include "bcast.h"
+#include "server.h"
+#include "msg.h"
+#include "socket.h"
+#include "node.h"
+#include "name_table.h"
+#include "subscr.h"
+
+#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
+#define ADV_IDLE ADV_UNIT
+#define ADV_ACTIVE (ADV_UNIT * 12)
+
+enum mbr_state {
+ MBR_QUARANTINED,
+ MBR_DISCOVERED,
+ MBR_JOINING,
+ MBR_PUBLISHED,
+ MBR_JOINED,
+ MBR_PENDING,
+ MBR_ACTIVE,
+ MBR_RECLAIMING,
+ MBR_REMITTED,
+ MBR_LEAVING
+};
+
+struct tipc_member {
+ struct rb_node tree_node;
+ struct list_head list;
+ struct list_head congested;
+ struct sk_buff *event_msg;
+ struct sk_buff_head deferredq;
+ struct tipc_group *group;
+ u32 node;
+ u32 port;
+ u32 instance;
+ enum mbr_state state;
+ u16 advertised;
+ u16 window;
+ u16 bc_rcv_nxt;
+ u16 bc_syncpt;
+ u16 bc_acked;
+ bool usr_pending;
+};
+
+struct tipc_group {
+ struct rb_root members;
+ struct list_head congested;
+ struct list_head pending;
+ struct list_head active;
+ struct list_head reclaiming;
+ struct tipc_nlist dests;
+ struct net *net;
+ int subid;
+ u32 type;
+ u32 instance;
+ u32 domain;
+ u32 scope;
+ u32 portid;
+ u16 member_cnt;
+ u16 active_cnt;
+ u16 max_active;
+ u16 bc_snd_nxt;
+ u16 bc_ackers;
+ bool loopback;
+ bool events;
+};
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq);
+
+static void tipc_group_decr_active(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ grp->active_cnt--;
+}
+
+static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
+{
+ int max_active, active_pool, idle_pool;
+ int mcnt = grp->member_cnt + 1;
+
+ /* Limit simultaneous reception from other members */
+ max_active = min(mcnt / 8, 64);
+ max_active = max(max_active, 16);
+ grp->max_active = max_active;
+
+ /* Reserve blocks for active and idle members */
+ active_pool = max_active * ADV_ACTIVE;
+ idle_pool = (mcnt - max_active) * ADV_IDLE;
+
+ /* Scale to bytes, considering worst-case truesize/msgsize ratio */
+ return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
+}
+
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
+{
+ return grp->bc_snd_nxt;
+}
+
+static bool tipc_group_is_enabled(struct tipc_member *m)
+{
+ return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+}
+
+static bool tipc_group_is_receiver(struct tipc_member *m)
+{
+ return m && m->state >= MBR_JOINED;
+}
+
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+ if (!grp->loopback)
+ return grp->portid;
+ return 0;
+}
+
+int tipc_group_size(struct tipc_group *grp)
+{
+ return grp->member_cnt;
+}
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq)
+{
+ struct tipc_group *grp;
+ u32 type = mreq->type;
+
+ grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
+ if (!grp)
+ return NULL;
+ tipc_nlist_init(&grp->dests, tipc_own_addr(net));
+ INIT_LIST_HEAD(&grp->congested);
+ INIT_LIST_HEAD(&grp->active);
+ INIT_LIST_HEAD(&grp->pending);
+ INIT_LIST_HEAD(&grp->reclaiming);
+ grp->members = RB_ROOT;
+ grp->net = net;
+ grp->portid = portid;
+ grp->domain = addr_domain(net, mreq->scope);
+ grp->type = type;
+ grp->instance = mreq->instance;
+ grp->scope = mreq->scope;
+ grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
+ grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+ return grp;
+ kfree(grp);
+ return NULL;
+}
+
+void tipc_group_delete(struct net *net, struct tipc_group *grp)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+ list_del(&m->list);
+ kfree(m);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ tipc_nlist_purge(&grp->dests);
+ tipc_topsrv_kern_unsubscr(net, grp->subid);
+ kfree(grp);
+}
+
+struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct rb_node *n = grp->members.rb_node;
+ u64 nkey, key = (u64)node << 32 | port;
+ struct tipc_member *m;
+
+ while (n) {
+ m = container_of(n, struct tipc_member, tree_node);
+ nkey = (u64)m->node << 32 | m->port;
+ if (key < nkey)
+ n = n->rb_left;
+ else if (key > nkey)
+ n = n->rb_right;
+ else
+ return m;
+ }
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct tipc_member *m;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (m && tipc_group_is_enabled(m))
+ return m;
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
+ u32 node)
+{
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (m->node == node)
+ return m;
+ }
+ return NULL;
+}
+
+static void tipc_group_add_to_tree(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ u64 nkey, key = (u64)m->node << 32 | m->port;
+ struct rb_node **n, *parent = NULL;
+ struct tipc_member *tmp;
+
+ n = &grp->members.rb_node;
+ while (*n) {
+ tmp = container_of(*n, struct tipc_member, tree_node);
+ parent = *n;
+ tmp = container_of(parent, struct tipc_member, tree_node);
+ nkey = (u64)tmp->node << 32 | tmp->port;
+ if (key < nkey)
+ n = &(*n)->rb_left;
+ else if (key > nkey)
+ n = &(*n)->rb_right;
+ else
+ return;
+ }
+ rb_link_node(&m->tree_node, parent, n);
+ rb_insert_color(&m->tree_node, &grp->members);
+}
+
+static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+ u32 node, u32 port,
+ int state)
+{
+ struct tipc_member *m;
+
+ m = kzalloc(sizeof(*m), GFP_ATOMIC);
+ if (!m)
+ return NULL;
+ INIT_LIST_HEAD(&m->list);
+ INIT_LIST_HEAD(&m->congested);
+ __skb_queue_head_init(&m->deferredq);
+ m->group = grp;
+ m->node = node;
+ m->port = port;
+ m->bc_acked = grp->bc_snd_nxt - 1;
+ grp->member_cnt++;
+ tipc_group_add_to_tree(grp, m);
+ tipc_nlist_add(&grp->dests, m->node);
+ m->state = state;
+ return m;
+}
+
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+{
+ tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+}
+
+static void tipc_group_delete_member(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ rb_erase(&m->tree_node, &grp->members);
+ grp->member_cnt--;
+
+ /* Check if we were waiting for replicast ack from this member */
+ if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
+ grp->bc_ackers--;
+
+ list_del_init(&m->list);
+ list_del_init(&m->congested);
+ tipc_group_decr_active(grp, m);
+
+ /* If last member on a node, remove node from dest list */
+ if (!tipc_group_find_node(grp, m->node))
+ tipc_nlist_del(&grp->dests, m->node);
+
+ kfree(m);
+}
+
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
+{
+ return &grp->dests;
+}
+
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope)
+{
+ seq->type = grp->type;
+ seq->lower = grp->instance;
+ seq->upper = grp->instance;
+ *scope = grp->scope;
+}
+
+void tipc_group_update_member(struct tipc_member *m, int len)
+{
+ struct tipc_group *grp = m->group;
+ struct tipc_member *_m, *tmp;
+
+ if (!tipc_group_is_enabled(m))
+ return;
+
+ m->window -= len;
+
+ if (m->window >= ADV_IDLE)
+ return;
+
+ if (!list_empty(&m->congested))
+ return;
+
+ /* Sort member into congested members' list */
+ list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
+ if (m->window > _m->window)
+ continue;
+ list_add_tail(&m->congested, &_m->congested);
+ return;
+ }
+ list_add_tail(&m->congested, &grp->congested);
+}
+
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
+{
+ u16 prev = grp->bc_snd_nxt - 1;
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (tipc_group_is_enabled(m)) {
+ tipc_group_update_member(m, len);
+ m->bc_acked = prev;
+ }
+ }
+
+ /* Mark number of acknowledges to expect, if any */
+ if (ack)
+ grp->bc_ackers = grp->member_cnt;
+ grp->bc_snd_nxt++;
+}
+
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **mbr)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_member *m;
+ int adv, state;
+
+ m = tipc_group_find_dest(grp, dnode, dport);
+ *mbr = m;
+ if (!m)
+ return false;
+ if (m->usr_pending)
+ return true;
+ if (m->window >= len)
+ return false;
+ m->usr_pending = true;
+
+ /* If not fully advertised, do it now to prevent mutual blocking */
+ adv = m->advertised;
+ state = m->state;
+ if (state < MBR_JOINED)
+ return true;
+ if (state == MBR_JOINED && adv == ADV_IDLE)
+ return true;
+ if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
+ return true;
+ if (state == MBR_PENDING && adv == ADV_IDLE)
+ return true;
+ skb_queue_head_init(&xmitq);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
+ tipc_node_distr_xmit(grp->net, &xmitq);
+ return true;
+}
+
+bool tipc_group_bc_cong(struct tipc_group *grp, int len)
+{
+ struct tipc_member *m = NULL;
+
+ /* If prev bcast was replicast, reject until all receivers have acked */
+ if (grp->bc_ackers)
+ return true;
+
+ if (list_empty(&grp->congested))
+ return false;
+
+ m = list_first_entry(&grp->congested, struct tipc_member, congested);
+ if (m->window >= len)
+ return false;
+
+ return tipc_group_cong(grp, m->node, m->port, len, &m);
+}
+
+/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
+ */
+static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
+{
+ struct tipc_msg *_hdr, *hdr = buf_msg(skb);
+ u16 bc_seqno = msg_grp_bc_seqno(hdr);
+ struct sk_buff *_skb, *tmp;
+ int mtyp = msg_type(hdr);
+
+ /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
+ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
+ skb_queue_walk_safe(defq, _skb, tmp) {
+ _hdr = buf_msg(_skb);
+ if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
+ continue;
+ __skb_queue_before(defq, _skb, skb);
+ return;
+ }
+ /* Bcast was not bypassed, - add to tail */
+ }
+ /* Unicasts are never bypassed, - always add to tail */
+ __skb_queue_tail(defq, skb);
+}
+
+/* tipc_group_filter_msg() - determine if we should accept arriving message
+ */
+void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ bool ack, deliver, update, leave = false;
+ struct sk_buff_head *defq;
+ struct tipc_member *m;
+ struct tipc_msg *hdr;
+ u32 node, port;
+ int mtyp, blks;
+
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ port = msg_origport(hdr);
+
+ if (!msg_in_group(hdr))
+ goto drop;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!tipc_group_is_receiver(m))
+ goto drop;
+
+ if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ defq = &m->deferredq;
+ tipc_group_sort_msg(skb, defq);
+
+ while ((skb = skb_peek(defq))) {
+ hdr = buf_msg(skb);
+ mtyp = msg_type(hdr);
+ deliver = true;
+ ack = false;
+ update = false;
+
+ if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ break;
+
+ /* Decide what to do with message */
+ switch (mtyp) {
+ case TIPC_GRP_MCAST_MSG:
+ if (msg_nameinst(hdr) != grp->instance) {
+ update = true;
+ deliver = false;
+ }
+ /* Fall thru */
+ case TIPC_GRP_BCAST_MSG:
+ m->bc_rcv_nxt++;
+ ack = msg_grp_bc_ack_req(hdr);
+ break;
+ case TIPC_GRP_UCAST_MSG:
+ break;
+ case TIPC_GRP_MEMBER_EVT:
+ if (m->state == MBR_LEAVING)
+ leave = true;
+ if (!grp->events)
+ deliver = false;
+ break;
+ default:
+ break;
+ }
+
+ /* Execute decisions */
+ __skb_dequeue(defq);
+ if (deliver)
+ __skb_queue_tail(inputq, skb);
+ else
+ kfree_skb(skb);
+
+ if (ack)
+ tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
+
+ if (leave) {
+ tipc_group_delete_member(grp, m);
+ __skb_queue_purge(defq);
+ break;
+ }
+ if (!update)
+ continue;
+
+ blks = msg_blocks(hdr);
+ tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
+ }
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq)
+{
+ struct list_head *active = &grp->active;
+ int max_active = grp->max_active;
+ int reclaim_limit = max_active * 3 / 4;
+ int active_cnt = grp->active_cnt;
+ struct tipc_member *m, *rm;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!m)
+ return;
+
+ m->advertised -= blks;
+
+ switch (m->state) {
+ case MBR_JOINED:
+ /* Reclaim advertised space from least active member */
+ if (!list_empty(active) && active_cnt >= reclaim_limit) {
+ rm = list_first_entry(active, struct tipc_member, list);
+ rm->state = MBR_RECLAIMING;
+ list_move_tail(&rm->list, &grp->reclaiming);
+ tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
+ }
+ /* If max active, become pending and wait for reclaimed space */
+ if (active_cnt >= max_active) {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ break;
+ }
+ /* Otherwise become active */
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, &grp->active);
+ grp->active_cnt++;
+ /* Fall through */
+ case MBR_ACTIVE:
+ if (!list_is_last(&m->list, &grp->active))
+ list_move_tail(&m->list, &grp->active);
+ if (m->advertised > (ADV_ACTIVE * 3 / 4))
+ break;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_REMITTED:
+ if (m->advertised > ADV_IDLE)
+ break;
+ m->state = MBR_JOINED;
+ if (m->advertised < ADV_IDLE) {
+ pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ }
+ break;
+ case MBR_RECLAIMING:
+ case MBR_DISCOVERED:
+ case MBR_JOINING:
+ case MBR_LEAVING:
+ default:
+ break;
+ }
+}
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ int adv = 0;
+
+ skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
+ m->node, tipc_own_addr(grp->net),
+ m->port, grp->portid, 0);
+ if (!skb)
+ return;
+
+ if (m->state == MBR_ACTIVE)
+ adv = ADV_ACTIVE - m->advertised;
+ else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
+ adv = ADV_IDLE - m->advertised;
+
+ hdr = buf_msg(skb);
+
+ if (mtyp == GRP_JOIN_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_LEAVE_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ } else if (mtyp == GRP_ADV_MSG) {
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_ACK_MSG) {
+ msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
+ } else if (mtyp == GRP_REMIT_MSG) {
+ msg_set_grp_remitted(hdr, m->window);
+ }
+ __skb_queue_tail(xmitq, skb);
+}
+
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
+ struct tipc_msg *hdr, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ u32 node = msg_orignode(hdr);
+ u32 port = msg_origport(hdr);
+ struct tipc_member *m, *pm;
+ struct tipc_msg *ehdr;
+ u16 remitted, in_flight;
+
+ if (!grp)
+ return;
+
+ m = tipc_group_find_member(grp, node, port);
+
+ switch (msg_type(hdr)) {
+ case GRP_JOIN_MSG:
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_QUARANTINED);
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ m->bc_rcv_nxt = m->bc_syncpt;
+ m->window += msg_adv_win(hdr);
+
+ /* Wait until PUBLISH event is received */
+ if (m->state == MBR_DISCOVERED) {
+ m->state = MBR_JOINING;
+ } else if (m->state == MBR_PUBLISHED) {
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ }
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ return;
+ case GRP_LEAVE_MSG:
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+
+ /* Wait until WITHDRAW event is received */
+ if (m->state != MBR_LEAVING) {
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ return;
+ }
+ /* Otherwise deliver already received WITHDRAW event */
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ *usr_wakeup = true;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ADV_MSG:
+ if (!m)
+ return;
+ m->window += msg_adv_win(hdr);
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ACK_MSG:
+ if (!m)
+ return;
+ m->bc_acked = msg_grp_bc_acked(hdr);
+ if (--grp->bc_ackers)
+ break;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ return;
+ case GRP_RECLAIM_MSG:
+ if (!m)
+ return;
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
+ m->window = ADV_IDLE;
+ return;
+ case GRP_REMIT_MSG:
+ if (!m || m->state != MBR_RECLAIMING)
+ return;
+
+ list_del_init(&m->list);
+ grp->active_cnt--;
+ remitted = msg_grp_remitted(hdr);
+
+ /* Messages preceding the REMIT still in receive queue */
+ if (m->advertised > remitted) {
+ m->state = MBR_REMITTED;
+ in_flight = m->advertised - remitted;
+ }
+ /* All messages preceding the REMIT have been read */
+ if (m->advertised <= remitted) {
+ m->state = MBR_JOINED;
+ in_flight = 0;
+ }
+ /* ..and the REMIT overtaken by more messages => re-advertise */
+ if (m->advertised < remitted)
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+
+ m->advertised = ADV_IDLE + in_flight;
+
+ /* Set oldest pending member to active and advertise */
+ if (list_empty(&grp->pending))
+ return;
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ return;
+ default:
+ pr_warn("Received unknown GROUP_PROTO message\n");
+ }
+}
+
+/* tipc_group_member_evt() - receive and handle a member up/down event
+ */
+void tipc_group_member_evt(struct tipc_group *grp,
+ bool *usr_wakeup,
+ int *sk_rcvbuf,
+ struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_event *evt = (void *)msg_data(hdr);
+ u32 instance = evt->found_lower;
+ u32 node = evt->port.node;
+ u32 port = evt->port.ref;
+ int event = evt->event;
+ struct tipc_member *m;
+ struct net *net;
+ bool node_up;
+ u32 self;
+
+ if (!grp)
+ goto drop;
+
+ net = grp->net;
+ self = tipc_own_addr(net);
+ if (!grp->loopback && node == self && port == grp->portid)
+ goto drop;
+
+ /* Convert message before delivery to user */
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+ msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
+ msg_set_origport(hdr, port);
+ msg_set_orignode(hdr, node);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+
+ m = tipc_group_find_member(grp, node, port);
+
+ if (event == TIPC_PUBLISHED) {
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_DISCOVERED);
+ if (!m)
+ goto drop;
+
+ /* Hold back event if JOIN message not yet received */
+ if (m->state == MBR_DISCOVERED) {
+ m->event_msg = skb;
+ m->state = MBR_PUBLISHED;
+ } else {
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, skb);
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ }
+ m->instance = instance;
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ } else if (event == TIPC_WITHDRAWN) {
+ if (!m)
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ node_up = tipc_node_is_up(net, node);
+
+ /* Hold back event if more messages might be expected */
+ if (m->state != MBR_LEAVING && node_up) {
+ m->event_msg = skb;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ } else {
+ if (node_up)
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ else
+ msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+ __skb_queue_tail(inputq, skb);
+ }
+ list_del_init(&m->congested);
+ }
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+ return;
+drop:
+ kfree_skb(skb);
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 0000000..d525e1c
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_GROUP_H
+#define _TIPC_GROUP_H
+
+#include "core.h"
+
+struct tipc_group;
+struct tipc_member;
+struct tipc_msg;
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq);
+void tipc_group_delete(struct net *net, struct tipc_group *grp);
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
+void tipc_group_filter_msg(struct tipc_group *grp,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
+ int *sk_rcvbuf, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **m);
+bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq);
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
+void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_size(struct tipc_group *grp);
+#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ac0144f..870b9b8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1039,6 +1039,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
+ struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
struct tipc_msg *hdr = buf_msg(skb);
switch (msg_user(hdr)) {
@@ -1046,13 +1047,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
- if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) {
- skb_queue_tail(l->bc_rcvlink->inputq, skb);
+ if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
+ skb_queue_tail(mc_inputq, skb);
return true;
}
case CONN_MANAGER:
skb_queue_tail(inputq, skb);
return true;
+ case GROUP_PROTOCOL:
+ skb_queue_tail(mc_inputq, skb);
+ return true;
case NAME_DISTRIBUTOR:
l->bc_rcvlink->state = LINK_ESTABLISHED;
skb_queue_tail(l->namedq, skb);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 9e109bb..8e884ed 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen,
u16 gen = mon->dom_gen;
u16 len;
- if (!tipc_mon_is_active(net, mon))
+ /* Send invalid record if not active */
+ if (!tipc_mon_is_active(net, mon)) {
+ dom->len = 0;
return;
+ }
/* Send only a dummy record with ack if peer has acked our last sent */
if (likely(state->acked_gen == gen)) {
@@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr,
struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
struct tipc_peer *peer;
+ if (!tipc_mon_is_active(net, mon)) {
+ state->probing = false;
+ state->monitoring = true;
+ return;
+ }
+
/* Used cached state if table has not changed */
if (!state->probing &&
(state->list_gen == mon->list_gen) &&
@@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr,
read_unlock_bh(&mon->lock);
}
-static void mon_timeout(unsigned long m)
+static void mon_timeout(struct timer_list *t)
{
- struct tipc_monitor *mon = (void *)m;
+ struct tipc_monitor *mon = from_timer(mon, t, timer);
struct tipc_peer *self;
int best_member_cnt = dom_size(mon->peer_cnt) - 1;
@@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id)
self->is_up = true;
self->is_head = true;
INIT_LIST_HEAD(&self->list);
- setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
+ timer_setup(&mon->timer, mon_timeout, 0);
mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
mod_timer(&mon->timer, jiffies + mon->timer_intv);
return 0;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 6ef379f..1649d45 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return false;
if (msg_errcode(msg))
return false;
- *err = -TIPC_ERR_NO_NAME;
+ *err = TIPC_ERR_NO_NAME;
if (skb_linearize(skb))
return false;
msg = buf_msg(skb);
@@ -568,6 +568,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
msg_set_destnode(msg, dnode);
msg_set_destport(msg, dport);
*err = TIPC_OK;
+
+ if (!skb_cloned(skb))
+ return true;
+
+ /* Unclone buffer in case it was bundled */
+ if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
+ return false;
+
return true;
}
@@ -658,3 +666,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
}
kfree_skb(skb);
}
+
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+ __skb_queue_tail(xmitq, skb);
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2..cedf811 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2007, 2014-2015 Ericsson AB
+ * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
* Copyright (c) 2005-2008, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -61,10 +61,14 @@ struct plist;
/*
* Payload message types
*/
-#define TIPC_CONN_MSG 0
-#define TIPC_MCAST_MSG 1
-#define TIPC_NAMED_MSG 2
-#define TIPC_DIRECT_MSG 3
+#define TIPC_CONN_MSG 0
+#define TIPC_MCAST_MSG 1
+#define TIPC_NAMED_MSG 2
+#define TIPC_DIRECT_MSG 3
+#define TIPC_GRP_MEMBER_EVT 4
+#define TIPC_GRP_BCAST_MSG 5
+#define TIPC_GRP_MCAST_MSG 6
+#define TIPC_GRP_UCAST_MSG 7
/*
* Internal message users
@@ -73,11 +77,13 @@ struct plist;
#define MSG_BUNDLER 6
#define LINK_PROTOCOL 7
#define CONN_MANAGER 8
+#define GROUP_PROTOCOL 9
#define TUNNEL_PROTOCOL 10
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
#define SOCK_WAKEUP 14 /* pseudo user */
+#define TOP_SRV 15 /* pseudo user */
/*
* Message header sizes
@@ -86,6 +92,7 @@ struct plist;
#define BASIC_H_SIZE 32 /* Basic payload message */
#define NAMED_H_SIZE 40 /* Named payload message */
#define MCAST_H_SIZE 44 /* Multicast payload message */
+#define GROUP_H_SIZE 44 /* Group payload message */
#define INT_H_SIZE 40 /* Internal messages */
#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
struct tipc_skb_cb {
u32 bytes_read;
+ u32 orig_member;
struct sk_buff *tail;
bool validated;
u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
return msg_bits(m, 0, 0, 0x1ffff);
}
+static inline u32 msg_blocks(struct tipc_msg *m)
+{
+ return (msg_size(m) / 1024) + 1;
+}
+
static inline u32 msg_data_sz(struct tipc_msg *m)
{
return msg_size(m) - msg_hdr_sz(m);
@@ -251,6 +264,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 29, 0x7, n);
}
+static inline int msg_in_group(struct tipc_msg *m)
+{
+ int mtyp = msg_type(m);
+
+ return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
+}
+
+static inline bool msg_is_grp_evt(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_GRP_MEMBER_EVT;
+}
+
static inline u32 msg_named(struct tipc_msg *m)
{
return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +283,10 @@ static inline u32 msg_named(struct tipc_msg *m)
static inline u32 msg_mcast(struct tipc_msg *m)
{
- return msg_type(m) == TIPC_MCAST_MSG;
+ int mtyp = msg_type(m);
+
+ return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
+ (mtyp == TIPC_GRP_MCAST_MSG));
}
static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +542,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
#define DSC_RESP_MSG 1
/*
+ * Group protocol message types
+ */
+#define GRP_JOIN_MSG 0
+#define GRP_LEAVE_MSG 1
+#define GRP_ADV_MSG 2
+#define GRP_ACK_MSG 3
+#define GRP_RECLAIM_MSG 4
+#define GRP_REMIT_MSG 5
+
+/*
* Word 1
*/
static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +802,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_adv_win(struct tipc_msg *m)
+static inline u16 msg_adv_win(struct tipc_msg *m)
{
return msg_bits(m, 9, 0, 0xffff);
}
-static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 0, 0xffff, n);
}
@@ -794,6 +832,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
+static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_remitted(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+/* Word 10
+ */
+static inline u16 msg_grp_evt(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x3);
+}
+
+static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
+{
+ msg_set_bits(m, 10, 0, 0x3, n);
+}
+
+static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x1);
+}
+
+static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
+{
+ msg_set_bits(m, 10, 0, 0x1, n);
+}
+
+static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 10, 16, 0xffff, n);
+}
+
static inline bool msg_peer_link_is_up(struct tipc_msg *m)
{
if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -818,6 +918,8 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
bool tipc_msg_validate(struct sk_buff *skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode);
struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac8..b3829bc 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
#include "bcast.h"
#include "addr.h"
#include "node.h"
+#include "group.h"
#include <net/genetlink.h>
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
return ref;
}
-/**
- * tipc_nametbl_mc_translate - find multicast destinations
- *
- * Creates list of all local ports that overlap the given multicast address;
- * also determines if any off-node ports overlap.
- *
- * Note: Publications with a scope narrower than 'limit' are ignored.
- * (i.e. local node-scope publications mustn't receive messages arriving
- * from another node, even if the multcast link brought it here)
- *
- * Returns non-zero if any off-node ports overlap
- */
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all)
+{
+ u32 self = tipc_own_addr(net);
+ struct publication *publ;
+ struct name_info *info;
+ struct name_seq *seq;
+ struct sub_seq *sseq;
+
+ if (!tipc_in_scope(domain, self))
+ return false;
+
+ *dstcnt = 0;
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (unlikely(!seq))
+ goto exit;
+ spin_lock_bh(&seq->lock);
+ sseq = nameseq_find_subseq(seq, instance);
+ if (likely(sseq)) {
+ info = sseq->info;
+ list_for_each_entry(publ, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, publ->node))
+ continue;
+ if (publ->ref == exclude && publ->node == self)
+ continue;
+ tipc_dest_push(dsts, publ->node, publ->ref);
+ (*dstcnt)++;
+ if (all)
+ continue;
+ list_move_tail(&publ->zone_list, &info->zone_list);
+ break;
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+ return !list_empty(dsts);
+}
+
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports)
{
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
info = sseq->info;
list_for_each_entry(publ, &info->node_list, node_list) {
if (publ->scope <= limit)
- u32_push(dports, publ->ref);
+ tipc_dest_push(dports, 0, publ->ref);
}
if (info->cluster_list_size != info->node_list_size)
@@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
spin_lock_bh(&seq->lock);
sseq = seq->sseqs + nameseq_locate_subseq(seq, lower);
stop = seq->sseqs + seq->first_free;
- for (; sseq->lower <= upper && sseq != stop; sseq++) {
+ for (; sseq != stop && sseq->lower <= upper; sseq++) {
info = sseq->info;
list_for_each_entry(publ, &info->zone_list, zone_list) {
if (tipc_in_scope(domain, publ->node))
@@ -679,6 +709,37 @@ exit:
rcu_read_unlock();
}
+/* tipc_nametbl_build_group - build list of communication group members
+ */
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain)
+{
+ struct sub_seq *sseq, *stop;
+ struct name_info *info;
+ struct publication *p;
+ struct name_seq *seq;
+
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (!seq)
+ goto exit;
+
+ spin_lock_bh(&seq->lock);
+ sseq = seq->sseqs;
+ stop = seq->sseqs + seq->first_free;
+ for (; sseq != stop; sseq++) {
+ info = sseq->info;
+ list_for_each_entry(p, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, p->node))
+ continue;
+ tipc_group_add_member(grp, p->node, p->ref);
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+}
+
/*
* tipc_nametbl_publish - add name publication to network name tables
*/
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-bool u32_find(struct list_head *l, u32 value)
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return true;
+ list_for_each_entry(dst, l, list) {
+ if (dst->value != value)
+ continue;
+ return dst;
}
- return false;
+ return NULL;
}
-bool u32_push(struct list_head *l, u32 value)
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return false;
- }
- item = kmalloc(sizeof(*item), GFP_ATOMIC);
- if (unlikely(!item))
+ if (tipc_dest_find(l, node, port))
return false;
- item->value = value;
- list_add(&item->list, l);
+ dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
+ if (unlikely(!dst))
+ return false;
+ dst->value = value;
+ list_add(&dst->list, l);
return true;
}
-u32 u32_pop(struct list_head *l)
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
{
- struct u32_item *item;
- u32 value = 0;
+ struct tipc_dest *dst;
if (list_empty(l))
- return 0;
- item = list_first_entry(l, typeof(*item), list);
- value = item->value;
- list_del(&item->list);
- kfree(item);
- return value;
+ return false;
+ dst = list_first_entry(l, typeof(*dst), list);
+ if (port)
+ *port = dst->port;
+ if (node)
+ *node = dst->node;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-bool u32_del(struct list_head *l, u32 value)
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst;
- list_for_each_entry_safe(item, tmp, l, list) {
- if (item->value != value)
- continue;
- list_del(&item->list);
- kfree(item);
- return true;
- }
- return false;
+ dst = tipc_dest_find(l, node, port);
+ if (!dst)
+ return false;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-void u32_list_purge(struct list_head *l)
+void tipc_dest_list_purge(struct list_head *l)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst, *tmp;
- list_for_each_entry_safe(item, tmp, l, list) {
- list_del(&item->list);
- kfree(item);
+ list_for_each_entry_safe(dst, tmp, l, list) {
+ list_del(&dst->list);
+ kfree(dst);
}
}
-int u32_list_len(struct list_head *l)
+int tipc_dest_list_len(struct list_head *l)
{
- struct u32_item *item;
+ struct tipc_dest *dst;
int i = 0;
- list_for_each_entry(item, l, list) {
+ list_for_each_entry(dst, l, list) {
i++;
}
return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1..71926e4 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
struct tipc_subscription;
struct tipc_plist;
struct tipc_nlist;
+struct tipc_group;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports);
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
u32 upper, u32 domain,
struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
u32 upper, u32 scope, u32 port_ref,
u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
-struct u32_item {
+struct tipc_dest {
struct list_head list;
- u32 value;
+ union {
+ struct {
+ u32 port;
+ u32 node;
+ };
+ u64 value;
+ };
};
-bool u32_push(struct list_head *l, u32 value);
-u32 u32_pop(struct list_head *l);
-bool u32_find(struct list_head *l, u32 value);
-bool u32_del(struct list_head *l, u32 value);
-void u32_list_purge(struct list_head *l);
-int u32_list_len(struct list_head *l);
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
+void tipc_dest_list_purge(struct list_head *l);
+int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 198dbc7..009a816 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
bool delete);
static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
static void tipc_node_delete(struct tipc_node *node);
-static void tipc_node_timeout(unsigned long data);
+static void tipc_node_timeout(struct timer_list *t);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
static void tipc_node_put(struct tipc_node *node);
-static bool tipc_node_is_up(struct tipc_node *n);
+static bool node_is_up(struct tipc_node *n);
struct tipc_sock_conn {
u32 port;
@@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
goto exit;
}
tipc_node_get(n);
- setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
+ timer_setup(&n->timer, tipc_node_timeout, 0);
n->keepalive_intv = U32_MAX;
hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
/* tipc_node_timeout - handle expiration of node timer
*/
-static void tipc_node_timeout(unsigned long data)
+static void tipc_node_timeout(struct timer_list *t)
{
- struct tipc_node *n = (struct tipc_node *)data;
+ struct tipc_node *n = from_timer(n, t, timer);
struct tipc_link_entry *le;
struct sk_buff_head xmitq;
int bearer_id;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
*slot1 = i;
}
- if (!tipc_node_is_up(n)) {
+ if (!node_is_up(n)) {
if (tipc_link_peer_is_down(l))
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_sk_rcv(n->net, &le->inputq);
}
-static bool tipc_node_is_up(struct tipc_node *n)
+static bool node_is_up(struct tipc_node *n)
{
return n->active_links[0] != INVALID_BEARER_ID;
}
+bool tipc_node_is_up(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ bool retval = false;
+
+ if (in_own_node(net, addr))
+ return true;
+
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
+ retval = node_is_up(n);
+ tipc_node_put(n);
+ return retval;
+}
+
void tipc_node_check_dest(struct net *net, u32 onode,
struct tipc_bearer *b,
u16 capabilities, u32 signature,
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
goto attr_msg_full;
- if (tipc_node_is_up(node))
+ if (node_is_up(node))
if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
goto attr_msg_full;
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
return 0;
}
+/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
+ * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
+ */
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ u32 selector, dnode;
+
+ while ((skb = __skb_dequeue(xmitq))) {
+ selector = msg_origport(buf_msg(skb));
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, selector);
+ }
+ return 0;
+}
+
void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
{
struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
dst = n->addr;
if (in_own_node(net, dst))
continue;
- if (!tipc_node_is_up(n))
+ if (!node_is_up(n))
continue;
txskb = pskb_copy(skb, GFP_ATOMIC);
if (!txskb)
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c229..acd58d2 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
TIPC_BCAST_SYNCH = (1 << 1),
TIPC_BCAST_STATE_NACK = (1 << 2),
TIPC_BLOCK_FLOWCTL = (1 << 3),
- TIPC_BCAST_RCAST = (1 << 4)
+ TIPC_BCAST_RCAST = (1 << 4),
+ TIPC_MCAST_GROUPS = (1 << 5)
};
#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
int selector);
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402..acaef80 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
#include "server.h"
#include "core.h"
#include "socket.h"
+#include "addr.h"
+#include "msg.h"
#include <net/sock.h>
#include <linux/module.h>
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
sock_release(sock);
con->sock = NULL;
-
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
}
-
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
tipc_clean_outqueues(con);
kfree(con);
}
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- tipc_unregister_callbacks(con);
+ if (con->sock)
+ tipc_unregister_callbacks(con);
if (con->conid)
s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
* are harmless for us here as we have already deleted this
* connection from server connection list.
*/
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
-
+ if (con->sock)
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
conn_put(con);
}
}
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
}
}
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid)
+{
+ struct tipc_subscriber *scbr;
+ struct tipc_subscr sub;
+ struct tipc_server *s;
+ struct tipc_conn *con;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = TIPC_SUB_PORTS;
+ *(u32 *)&sub.usr_handle = port;
+
+ con = tipc_alloc_conn(tipc_topsrv(net));
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ s = con->server;
+ scbr = s->tipc_conn_new(*conid);
+ if (!scbr) {
+ tipc_close_conn(con);
+ return false;
+ }
+
+ con->usr_data = scbr;
+ con->sock = NULL;
+ s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
+ return true;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+ tipc_close_conn(con);
+ conn_put(con);
+}
+
+static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_sk_rcv(net, &evtq);
+}
+
static void tipc_send_to_sock(struct tipc_conn *con)
{
- int count = 0;
struct tipc_server *s = con->server;
struct outqueue_entry *e;
+ struct tipc_event *evt;
struct msghdr msg;
+ int count = 0;
int ret;
spin_lock_bh(&con->outqueue_lock);
while (test_bit(CF_CONNECTED, &con->flags)) {
- e = list_entry(con->outqueue.next, struct outqueue_entry,
- list);
+ e = list_entry(con->outqueue.next, struct outqueue_entry, list);
if ((struct list_head *) e == &con->outqueue)
break;
- spin_unlock_bh(&con->outqueue_lock);
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
+ spin_unlock_bh(&con->outqueue_lock);
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
+ if (con->sock) {
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
+ msg.msg_name = &e->dest;
+ msg.msg_namelen = sizeof(struct sockaddr_tipc);
+ }
+ ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
+ e->iov.iov_len);
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ goto out;
+ } else if (ret < 0) {
+ goto send_err;
+ }
+ } else {
+ evt = e->iov.iov_base;
+ tipc_send_kern_top_evt(s->net, evt);
}
-
/* Don't starve users filling buffers */
if (++count >= MAX_SEND_MSG_COUNT) {
cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055..2113c91 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len);
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
/**
* tipc_conn_terminate - terminate connection with server
*
* Note: Must call it in process context since it might sleep
*/
void tipc_conn_terminate(struct tipc_server *s, int conid);
-
int tipc_server_start(struct tipc_server *s);
void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d50edd6..5d18c0c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -45,9 +45,10 @@
#include "socket.h"
#include "bcast.h"
#include "netlink.h"
+#include "group.h"
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
+#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
@@ -61,6 +62,11 @@ enum {
TIPC_CONNECTING = TCP_SYN_SENT,
};
+struct sockaddr_pair {
+ struct sockaddr_tipc sock;
+ struct sockaddr_tipc member;
+};
+
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
- * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @snt_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
* @peer: 'connected' peer for dgram/rdm
* @node: hash table node
@@ -109,20 +115,22 @@ struct tipc_sock {
struct rhash_head node;
struct tipc_mc_method mc_method;
struct rcu_head rcu;
+ struct tipc_group *group;
};
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
bool kern);
-static void tipc_sk_timeout(unsigned long data);
+static void tipc_sk_timeout(struct timer_list *t);
static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
+static int tipc_sk_leave(struct tipc_sock *tsk);
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
return tsk->snt_unacked > tsk->snd_win;
}
+static u16 tsk_blocks(int len)
+{
+ return ((len / FLOWCTL_BLK_SZ) + 1);
+}
+
/* tsk_blocks(): translate a buffer size in bytes to number of
* advertisable blocks, taking into account the ratio truesize(len)/len
* We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
NAMED_H_SIZE, 0);
msg_set_origport(msg, tsk->portid);
- setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
+ timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
- sk->sk_backlog_rcv = tipc_backlog_rcv;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
+ tipc_sk_leave(tsk);
tipc_sk_withdraw(tsk, 0, NULL);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
/* Reject any messages that accumulated in backlog queue */
release_sock(sk);
- u32_list_purge(&tsk->cong_links);
+ tipc_dest_list_purge(&tsk->cong_links);
tsk->cong_link_cnt = 0;
call_rcu(&tsk->rcu, tipc_sk_callback);
sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
res = tipc_sk_withdraw(tsk, 0, NULL);
goto exit;
}
-
+ if (tsk->group) {
+ res = -EACCES;
+ goto exit;
+ }
if (uaddr_len < sizeof(struct sockaddr_tipc)) {
res = -EINVAL;
goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- u32 mask = 0;
+ struct tipc_group *grp = tsk->group;
+ u32 revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ revents |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ revents |= POLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
- mask |= POLLOUT;
+ revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= (POLLIN | POLLRDNORM);
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_OPEN:
- if (!tsk->cong_link_cnt)
- mask |= POLLOUT;
- if (tipc_sk_type_connectionless(sk) &&
- (!skb_queue_empty(&sk->sk_receive_queue)))
- mask |= (POLLIN | POLLRDNORM);
+ if (!grp || tipc_group_size(grp))
+ if (!tsk->cong_link_cnt)
+ revents |= POLLOUT;
+ if (!tipc_sk_type_connectionless(sk))
+ break;
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_DISCONNECTING:
- mask = (POLLIN | POLLRDNORM | POLLHUP);
+ revents = POLLIN | POLLRDNORM | POLLHUP;
break;
}
-
- return mask;
+ return revents;
}
/**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_nlist dsts;
int rc;
+ if (tsk->group)
+ return -EACCES;
+
/* Block or return if any destination link is congested */
rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
}
/**
+ * tipc_send_group_msg - send a message to a member in the group
+ * @net: network namespace
+ * @m: message to send
+ * @mb: group member
+ * @dnode: destination node
+ * @dport: destination port
+ * @dlen: total length of message data
+ */
+static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
+ struct msghdr *m, struct tipc_member *mb,
+ u32 dnode, u32 dport, int dlen)
+{
+ u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct sk_buff_head pkts;
+ int mtu, rc;
+
+ /* Complete message header */
+ msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, dport);
+ msg_set_destnode(hdr, dnode);
+ msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tipc_dest_push(&tsk->cong_links, dnode, 0);
+ tsk->cong_link_cnt++;
+ }
+
+ /* Update send window */
+ tipc_group_update_member(mb, blks);
+
+ /* A broadcast sent within next EXPIRE period must follow same path */
+ method->rcast = true;
+ method->mandatory = true;
+ return dlen;
+}
+
+/**
+ * tipc_send_group_unicast - send message to a member in the group
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ struct tipc_member *mb = NULL;
+ u32 node, port;
+ int rc;
+
+ node = dest->addr.id.node;
+ port = dest->addr.id.ref;
+ if (!port && !node)
+ return -EHOSTUNREACH;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(&tsk->cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port, blks, &mb));
+ if (unlikely(rc))
+ return rc;
+
+ if (unlikely(!mb))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct list_head *cong_links = &tsk->cong_links;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_member *first = NULL;
+ struct tipc_member *mbr = NULL;
+ struct net *net = sock_net(sk);
+ u32 node, port, exclude;
+ u32 type, inst, domain;
+ struct list_head dsts;
+ int lookups = 0;
+ int dstcnt, rc;
+ bool cong;
+
+ INIT_LIST_HEAD(&dsts);
+
+ type = dest->addr.name.name.type;
+ inst = dest->addr.name.name.instance;
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+
+ while (++lookups < 4) {
+ first = NULL;
+
+ /* Look for a non-congested destination member, if any */
+ while (1) {
+ if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ &dstcnt, exclude, false))
+ return -EHOSTUNREACH;
+ tipc_dest_pop(&dsts, &node, &port);
+ cong = tipc_group_cong(grp, node, port, blks, &mbr);
+ if (!cong)
+ break;
+ if (mbr == first)
+ break;
+ if (!first)
+ first = mbr;
+ }
+
+ /* Start over if destination was not in member list */
+ if (unlikely(!mbr))
+ continue;
+
+ if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+ break;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port,
+ blks, &mbr));
+ if (unlikely(rc))
+ return rc;
+
+ /* Send, unless destination disappeared while waiting */
+ if (likely(mbr))
+ break;
+ }
+
+ if (unlikely(lookups >= 4))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_bcast - send message to all members in communication group
+ * @sk: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_nlist *dsts = tipc_group_dests(grp);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ bool ack = method->mandatory && method->rcast;
+ int blks = tsk_blocks(MCAST_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ int mtu = tipc_bcast_get_mtu(net);
+ struct sk_buff_head pkts;
+ int rc = -EHOSTUNREACH;
+
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
+ /* Block or return if any destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
+ !tipc_group_bc_cong(grp, blks));
+ if (unlikely(rc))
+ return rc;
+
+ /* Complete message header */
+ if (dest) {
+ msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
+ msg_set_nameinst(hdr, dest->addr.name.name.instance);
+ } else {
+ msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
+ msg_set_nameinst(hdr, 0);
+ }
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, 0);
+ msg_set_destnode(hdr, 0);
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+
+ /* Avoid getting stuck with repeated forced replicasts */
+ msg_set_grp_bc_ack_req(hdr, ack);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
+ if (unlikely(rc))
+ return rc;
+
+ /* Update broadcast sequence number and send windows */
+ tipc_group_update_bc_members(tsk->group, blks, ack);
+
+ /* Broadcast link is now free to choose method for next broadcast */
+ method->mandatory = false;
+ method->expires = jiffies;
+
+ return dlen;
+}
+
+/**
+ * tipc_send_group_mcast - send message to all members with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_name_seq *seq = &dest->addr.nameseq;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ u32 domain, exclude, dstcnt;
+ struct list_head dsts;
+
+ INIT_LIST_HEAD(&dsts);
+
+ if (seq->lower != seq->upper)
+ return -ENOTSUPP;
+
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+ if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
+ &dsts, &dstcnt, exclude, true))
+ return -EHOSTUNREACH;
+
+ if (dstcnt == 1) {
+ tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ }
+
+ tipc_dest_list_purge(&dsts);
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+}
+
+/**
* tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
* @arrvq: queue with arriving messages, to be cloned after destination lookup
* @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
- struct tipc_msg *msg;
- struct list_head dports;
- u32 portid;
u32 scope = TIPC_CLUSTER_SCOPE;
- struct sk_buff_head tmpq;
- uint hsz;
+ u32 self = tipc_own_addr(net);
struct sk_buff *skb, *_skb;
+ u32 lower = 0, upper = ~0;
+ struct sk_buff_head tmpq;
+ u32 portid, oport, onode;
+ struct list_head dports;
+ struct tipc_msg *msg;
+ int user, mtyp, hsz;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
msg = buf_msg(skb);
+ user = msg_user(msg);
+ mtyp = msg_type(msg);
+ if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
+ spin_lock_bh(&inputq->lock);
+ if (skb_peek(arrvq) == skb) {
+ __skb_dequeue(arrvq);
+ __skb_queue_tail(inputq, skb);
+ }
+ refcount_dec(&skb->users);
+ spin_unlock_bh(&inputq->lock);
+ continue;
+ }
hsz = skb_headroom(skb) + msg_hdr_sz(msg);
-
- if (in_own_node(net, msg_orignode(msg)))
+ oport = msg_origport(msg);
+ onode = msg_orignode(msg);
+ if (onode == self)
scope = TIPC_NODE_SCOPE;
/* Create destination port list and message clones: */
- tipc_nametbl_mc_translate(net,
- msg_nametype(msg), msg_namelower(msg),
- msg_nameupper(msg), scope, &dports);
- portid = u32_pop(&dports);
- for (; portid; portid = u32_pop(&dports)) {
+ if (!msg_in_group(msg)) {
+ lower = msg_namelower(msg);
+ upper = msg_nameupper(msg);
+ }
+ tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
+ scope, &dports);
+ while (tipc_dest_pop(&dports, NULL, &portid)) {
_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
}
/**
- * tipc_sk_proto_rcv - receive a connection mng protocol message
+ * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
* @skb: pointer to message buffer.
*/
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- struct sock *sk = &tsk->sk;
- u32 onode = tsk_own_node(tsk);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 onode = tsk_own_node(tsk);
+ struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
bool conn_cong;
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
struct list_head *clinks = &tsk->cong_links;
bool syn = !tipc_sk_type_connectionless(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
return -EMSGSIZE;
+ if (likely(dest)) {
+ if (unlikely(m->msg_namelen < sizeof(*dest)))
+ return -EINVAL;
+ if (unlikely(dest->family != AF_TIPC))
+ return -EINVAL;
+ }
+
+ if (grp) {
+ if (!dest)
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_NAME)
+ return tipc_send_group_anycast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_ID)
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_MCAST)
+ return tipc_send_group_mcast(sock, m, dlen, timeout);
+ return -EINVAL;
+ }
+
if (unlikely(!dest)) {
dest = &tsk->peer;
if (!syn || dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
- if (unlikely(m->msg_namelen < sizeof(*dest)))
- return -EINVAL;
-
- if (unlikely(dest->family != AF_TIPC))
- return -EINVAL;
-
if (unlikely(syn)) {
if (sk->sk_state == TIPC_LISTEN)
return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
msg_set_destport(hdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
-
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
}
/* Block or return if destination link is congested */
- rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(clinks, dnode, 0));
if (unlikely(rc))
return rc;
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
- u32_push(clinks, dnode);
+ tipc_dest_push(clinks, dnode, 0);
tsk->cong_link_cnt++;
rc = 0;
}
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
msg_set_lookup_scope(msg, 0);
msg_set_hdr_sz(msg, SHORT_H_SIZE);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
}
/**
- * set_orig_addr - capture sender's address for received message
+ * tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @hdr: received message header
*
* Note: Address is not captured if not requested by receiver.
*/
-static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
{
- DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
+ DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
+ struct tipc_msg *hdr = buf_msg(skb);
- if (addr) {
- addr->family = AF_TIPC;
- addr->addrtype = TIPC_ADDR_ID;
- memset(&addr->addr, 0, sizeof(addr->addr));
- addr->addr.id.ref = msg_origport(msg);
- addr->addr.id.node = msg_orignode(msg);
- addr->addr.name.domain = 0; /* could leave uninitialized */
- addr->scope = 0; /* could leave uninitialized */
- m->msg_namelen = sizeof(struct sockaddr_tipc);
- }
+ if (!srcaddr)
+ return;
+
+ srcaddr->sock.family = AF_TIPC;
+ srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.addr.id.ref = msg_origport(hdr);
+ srcaddr->sock.addr.id.node = msg_orignode(hdr);
+ srcaddr->sock.addr.name.domain = 0;
+ srcaddr->sock.scope = 0;
+ m->msg_namelen = sizeof(struct sockaddr_tipc);
+
+ if (!msg_in_group(hdr))
+ return;
+
+ /* Group message users may also want to know sending member's id */
+ srcaddr->member.family = AF_TIPC;
+ srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.addr.name.name.type = msg_nametype(hdr);
+ srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
+ srcaddr->member.addr.name.domain = 0;
+ m->msg_namelen = sizeof(*srcaddr);
}
/**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
size_t buflen, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *skb;
- struct tipc_msg *hdr;
bool connected = !tipc_sk_type_connectionless(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct sk_buff_head xmitq;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool grp_evt;
long timeout;
/* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
}
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ /* Step rcv queue to first msg with data or error; wait if necessary */
do {
- /* Look at first msg in receive queue; wait if necessary */
rc = tipc_wait_for_rcvmsg(sock, &timeout);
if (unlikely(rc))
goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
err = msg_errcode(hdr);
+ grp_evt = msg_is_grp_evt(hdr);
if (likely(dlen || err))
break;
tsk_advance_rx_queue(sk);
} while (1);
/* Collect msg meta data, including error code and rejected data */
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (unlikely(rc))
goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
+ /* Mark message as group event if applicable */
+ if (unlikely(grp_evt)) {
+ if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
+ m->msg_flags |= MSG_EOR;
+ m->msg_flags |= MSG_OOB;
+ copy = 0;
+ }
+
/* Caption of data or error code/rejected data was successful */
if (unlikely(flags & MSG_PEEK))
goto exit;
+ /* Send group flow control advertisement when applicable */
+ if (tsk->group && msg_in_group(hdr) && !grp_evt) {
+ skb_queue_head_init(&xmitq);
+ tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
+ msg_orignode(hdr), msg_origport(hdr),
+ &xmitq);
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
+ }
+
tsk_advance_rx_queue(sk);
+
if (likely(!connected))
goto exit;
- /* Send connection flow control ack when applicable */
+ /* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (rc)
break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
__skb_queue_purge(&sk->sk_receive_queue);
}
+static void tipc_sk_proto_rcv(struct sock *sk,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_group *grp = tsk->group;
+ bool wakeup = false;
+
+ switch (msg_user(hdr)) {
+ case CONN_MANAGER:
+ tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+ return;
+ case SOCK_WAKEUP:
+ tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ tsk->cong_link_cnt--;
+ wakeup = true;
+ break;
+ case GROUP_PROTOCOL:
+ tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+ break;
+ case TOP_SRV:
+ tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
+ skb, inputq, xmitq);
+ skb = NULL;
+ break;
+ default:
+ break;
+ }
+
+ if (wakeup)
+ sk->sk_write_space(sk);
+
+ kfree_skb(skb);
+}
+
/**
- * filter_connect - Handle all incoming messages for a connection-based socket
+ * tipc_filter_connect - Handle incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer. Set to NULL if buffer is consumed
*
* Returns true if everything ok, false otherwise
*/
-static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = buf_msg(skb);
+ if (unlikely(msg_in_group(hdr)))
+ return sk->sk_rcvbuf;
+
if (unlikely(!msg_connected(hdr)))
return sk->sk_rcvbuf << msg_importance(hdr);
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
}
/**
- * filter_rcv - validate incoming message
+ * tipc_sk_filter_rcv - validate incoming message
* @sk: socket
* @skb: pointer to message.
*
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
*
* Called with socket lock already taken
*
- * Returns true if message was added to socket receive queue, otherwise false
*/
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
+ bool sk_conn = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = buf_msg(skb);
- unsigned int limit = rcvbuf_limit(sk, skb);
- int err = TIPC_OK;
- int usr = msg_user(hdr);
- u32 onode;
+ struct net *net = sock_net(sk);
+ struct sk_buff_head inputq;
+ int limit, err = TIPC_OK;
- if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
- tipc_sk_proto_rcv(tsk, skb, xmitq);
- return false;
- }
+ TIPC_SKB_CB(skb)->bytes_read = 0;
+ __skb_queue_head_init(&inputq);
+ __skb_queue_tail(&inputq, skb);
- if (unlikely(usr == SOCK_WAKEUP)) {
- onode = msg_orignode(hdr);
- kfree_skb(skb);
- u32_del(&tsk->cong_links, onode);
- tsk->cong_link_cnt--;
- sk->sk_write_space(sk);
- return false;
- }
+ if (unlikely(!msg_isdata(hdr)))
+ tipc_sk_proto_rcv(sk, &inputq, xmitq);
- /* Drop if illegal message type */
- if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
- kfree_skb(skb);
- return false;
- }
+ if (unlikely(grp))
+ tipc_group_filter_msg(grp, &inputq, xmitq);
- /* Reject if wrong message type for current socket state */
- if (tipc_sk_type_connectionless(sk)) {
- if (msg_connected(hdr)) {
+ /* Validate and add to receive buffer if there is space */
+ while ((skb = __skb_dequeue(&inputq))) {
+ hdr = buf_msg(skb);
+ limit = rcvbuf_limit(sk, skb);
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ (!sk_conn && msg_connected(hdr)) ||
+ (!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
- goto reject;
- }
- } else if (unlikely(!filter_connect(tsk, skb))) {
- err = TIPC_ERR_NO_PORT;
- goto reject;
- }
+ else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+ err = TIPC_ERR_OVERLOAD;
- /* Reject message if there isn't room to queue it */
- if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
- err = TIPC_ERR_OVERLOAD;
- goto reject;
+ if (unlikely(err)) {
+ tipc_skb_reject(net, err, skb, xmitq);
+ err = TIPC_OK;
+ continue;
+ }
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk);
}
-
- /* Enqueue message */
- TIPC_SKB_CB(skb)->bytes_read = 0;
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- skb_set_owner_r(skb, sk);
-
- sk->sk_data_ready(sk);
- return true;
-
-reject:
- if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
- __skb_queue_tail(xmitq, skb);
- return false;
}
/**
- * tipc_backlog_rcv - handle incoming message from backlog queue
+ * tipc_sk_backlog_rcv - handle incoming message from backlog queue
* @sk: socket
* @skb: message
*
* Caller must hold socket lock
- *
- * Returns 0
*/
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
- unsigned int truesize = skb->truesize;
+ unsigned int before = sk_rmem_alloc_get(sk);
struct sk_buff_head xmitq;
- u32 dnode, selector;
+ unsigned int added;
__skb_queue_head_init(&xmitq);
- if (likely(filter_rcv(sk, skb, &xmitq))) {
- atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
- return 0;
- }
+ tipc_sk_filter_rcv(sk, skb, &xmitq);
+ added = sk_rmem_alloc_get(sk) - before;
+ atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
- if (skb_queue_empty(&xmitq))
- return 0;
-
- /* Send response/rejected message */
- skb = __skb_dequeue(&xmitq);
- dnode = msg_destnode(buf_msg(skb));
- selector = msg_origport(buf_msg(skb));
- tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+ /* Send pending response/rejected messages, if any */
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
return 0;
}
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Add message directly to receive queue if possible */
if (!sock_owned_by_user(sk)) {
- filter_rcv(sk, skb, xmitq);
+ tipc_sk_filter_rcv(sk, skb, xmitq);
continue;
}
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
spin_unlock_bh(&sk->sk_lock.slock);
}
/* Send pending response/rejected messages, if any */
- while ((skb = __skb_dequeue(&xmitq))) {
- dnode = msg_destnode(buf_msg(skb));
- tipc_node_xmit_skb(net, skb, dnode, dport);
- }
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
sock_put(sk);
continue;
}
-
/* No destination socket => dequeue skb if still there */
skb = tipc_skb_dequeue(inputq, dport);
if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int previous;
int res = 0;
+ if (destlen != sizeof(struct sockaddr_tipc))
+ return -EINVAL;
+
lock_sock(sk);
- /* DGRAM/RDM connect(), just save the destaddr */
- if (tipc_sk_type_connectionless(sk)) {
- if (dst->family == AF_UNSPEC) {
- memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
- } else if (destlen != sizeof(struct sockaddr_tipc)) {
- res = -EINVAL;
- } else {
- memcpy(&tsk->peer, dest, destlen);
- }
+ if (tsk->group) {
+ res = -EINVAL;
goto exit;
}
- /*
- * Reject connection attempt using multicast address
- *
- * Note: send_msg() validates the rest of the address fields,
- * so there's no need to do it here
- */
- if (dst->addrtype == TIPC_ADDR_MCAST) {
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
+ if (!tipc_sk_type_connectionless(sk))
+ res = -EINVAL;
+ goto exit;
+ } else if (dst->family != AF_TIPC) {
res = -EINVAL;
+ }
+ if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ res = -EINVAL;
+ if (res)
+ goto exit;
+
+ /* DGRAM/RDM connect(), just save the destaddr */
+ if (tipc_sk_type_connectionless(sk)) {
+ memcpy(&tsk->peer, dest, destlen);
goto exit;
}
@@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how)
return res;
}
-static void tipc_sk_timeout(unsigned long data)
+static void tipc_sk_timeout(struct timer_list *t)
{
- struct tipc_sock *tsk = (struct tipc_sock *)data;
- struct sock *sk = &tsk->sk;
- struct sk_buff *skb = NULL;
- u32 peer_port, peer_node;
+ struct sock *sk = from_timer(sk, t, sk_timer);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 peer_node = tsk_peer_node(tsk);
u32 own_node = tsk_own_node(tsk);
+ u32 own_port = tsk->portid;
+ struct net *net = sock_net(sk);
+ struct sk_buff *skb = NULL;
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk)) {
- bh_unlock_sock(sk);
+ if (!tipc_sk_connected(sk))
+ goto exit;
+
+ /* Try again later if socket is busy */
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
goto exit;
}
- peer_port = tsk_peer_port(tsk);
- peer_node = tsk_peer_node(tsk);
if (tsk->probe_unacked) {
- if (!sock_owned_by_user(sk)) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
- tsk_peer_port(tsk));
- sk->sk_state_change(sk);
- } else {
- /* Try again later */
- sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
- }
-
- bh_unlock_sock(sk);
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, peer_node, peer_port);
+ sk->sk_state_change(sk);
goto exit;
}
-
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
- INT_H_SIZE, 0, peer_node, own_node,
- peer_port, tsk->portid, TIPC_OK);
+ /* Send new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ peer_node, own_node, peer_port, own_port,
+ TIPC_OK);
tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+exit:
bh_unlock_sock(sk);
if (skb)
- tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
-exit:
+ tipc_node_xmit_skb(net, skb, peer_node, own_port);
sock_put(sk);
}
@@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net)
rhashtable_destroy(&tn->sk_rht);
}
+static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
+{
+ struct net *net = sock_net(&tsk->sk);
+ u32 domain = addr_domain(net, mreq->scope);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_name_seq seq;
+ int rc;
+
+ if (mreq->type < TIPC_RESERVED_TYPES)
+ return -EACCES;
+ if (grp)
+ return -EACCES;
+ grp = tipc_group_create(net, tsk->portid, mreq);
+ if (!grp)
+ return -ENOMEM;
+ tsk->group = grp;
+ msg_set_lookup_scope(hdr, mreq->scope);
+ msg_set_nametype(hdr, mreq->type);
+ msg_set_dest_droppable(hdr, true);
+ seq.type = mreq->type;
+ seq.lower = mreq->instance;
+ seq.upper = seq.lower;
+ tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+ if (rc) {
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ }
+
+ /* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ tsk->mc_method.rcast = true;
+ tsk->mc_method.mandatory = true;
+ return rc;
+}
+
+static int tipc_sk_leave(struct tipc_sock *tsk)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_name_seq seq;
+ int scope;
+
+ if (!grp)
+ return -EINVAL;
+ tipc_group_self(grp, &seq, &scope);
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ tipc_sk_withdraw(tsk, scope, &seq);
+ return 0;
+}
+
/**
* tipc_setsockopt - set socket option
* @sock: socket structure
@@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group_req mreq;
u32 value = 0;
int res = 0;
@@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_CONN_TIMEOUT:
if (ol < sizeof(value))
return -EINVAL;
- res = get_user(value, (u32 __user *)ov);
- if (res)
- return res;
+ if (get_user(value, (u32 __user *)ov))
+ return -EFAULT;
+ break;
+ case TIPC_GROUP_JOIN:
+ if (ol < sizeof(mreq))
+ return -EINVAL;
+ if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ return -EFAULT;
break;
default:
if (ov || ol)
@@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
tsk->mc_method.rcast = true;
tsk->mc_method.mandatory = true;
break;
+ case TIPC_GROUP_JOIN:
+ res = tipc_sk_join(tsk, &mreq);
+ break;
+ case TIPC_GROUP_LEAVE:
+ res = tipc_sk_leave(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- int len;
+ struct tipc_name_seq seq;
+ int len, scope;
u32 value;
int res;
@@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_GROUP_JOIN:
+ seq.type = 0;
+ if (tsk->group)
+ tipc_group_self(tsk->group, &seq, &scope);
+ value = seq.type;
+ break;
default:
res = -EINVAL;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index be3d9e3..251065d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -133,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
node);
}
-static void tipc_subscrp_timeout(unsigned long data)
+static void tipc_subscrp_timeout(struct timer_list *t)
{
- struct tipc_subscription *sub = (struct tipc_subscription *)data;
+ struct tipc_subscription *sub = from_timer(sub, t, timer);
struct tipc_subscriber *subscriber = sub->subscriber;
spin_lock_bh(&subscriber->lock);
@@ -303,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
- setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
+ timer_setup(&sub->timer, tipc_subscrp_timeout, 0);
timeout = htohl(sub->evt.s.timeout, swap);
if (timeout != TIPC_WAIT_FOREVER)
diff --git a/net/unix/Makefile b/net/unix/Makefile
index b663c60..ffd0a275 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux unix domain socket layer.
#
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7f46bab..a9ee634 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
*/
case SOCK_RAW:
sock->type = SOCK_DGRAM;
+ /* fall through */
case SOCK_DGRAM:
sock->ops = &unix_dgram_ops;
break;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d96797..384c84e 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
+ if (!net_eq(sock_net(sk), net))
+ goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index a24369d..970f964 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vsock. If unsure, say N.
+config VSOCKETS_DIAG
+ tristate "Virtual Sockets monitoring interface"
+ depends on VSOCKETS
+ default y
+ help
+ Support for PF_VSOCK sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
+
+ Enable this module so userspace applications can query open sockets.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index e63d574..7c6f9a0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,4 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
@@ -6,6 +8,8 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
+vsock_diag-y += diag.o
+
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e..5d28abf 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
* not support simultaneous connects (two "client" sockets connecting).
*
* - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the VSOCK_SS_LISTEN state. When a
+ * implementation because they are in the TCP_LISTEN state. When a
* connection request is received (the second kind of socket mentioned above),
* we create a new socket and refer to it as a pending socket. These pending
* sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
* argument, we must ensure the reference count is increased to ensure the
* socket isn't freed before the function is run; the deferred function will
* then drop the reference.
+ *
+ * - sk->sk_state uses the TCP state constants because they are widely used by
+ * other address families and exposed to userspace tools like ss(8):
+ *
+ * TCP_CLOSE - unconnected
+ * TCP_SYN_SENT - connecting
+ * TCP_ESTABLISHED - connected
+ * TCP_CLOSING - disconnecting
+ * TCP_LISTEN - listening
*/
#include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
* vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
* mods with VSOCK_HASH_SIZE to ensure this.
*/
-#define VSOCK_HASH_SIZE 251
#define MAX_PORT_RETRIES 24
#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
#define vsock_connected_sockets_vsk(vsk) \
vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
-static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
-static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
-static DEFINE_SPINLOCK(vsock_table_lock);
+struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(vsock_bind_table);
+struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+EXPORT_SYMBOL_GPL(vsock_connected_table);
+DEFINE_SPINLOCK(vsock_table_lock);
+EXPORT_SYMBOL_GPL(vsock_table_lock);
/* Autobind this socket to the local address if necessary. */
static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
return __vsock_bind(sk, &local_addr);
}
-static void vsock_init_tables(void)
+static int __init vsock_init_tables(void)
{
int i;
@@ -193,6 +204,7 @@ static void vsock_init_tables(void)
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
INIT_LIST_HEAD(&vsock_connected_table[i]);
+ return 0;
}
static void __vsock_insert_bound(struct list_head *list,
@@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
return NULL;
}
-static bool __vsock_in_bound_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->bound_table);
-}
-
-static bool __vsock_in_connected_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->connected_table);
-}
-
static void vsock_insert_unbound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
@@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work)
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
- sk->sk_state = SS_FREE;
+ sk->sk_state = TCP_CLOSE;
out:
release_sock(sk);
@@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net,
sk->sk_destruct = vsock_sk_destruct;
sk->sk_backlog_rcv = vsock_queue_rcv_skb;
- sk->sk_state = 0;
sock_reset_flag(sk, SOCK_DONE);
INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
/* Listening sockets that have connections in their accept
* queue can be read.
*/
- if (sk->sk_state == VSOCK_SS_LISTEN
+ if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
mask |= POLLIN | POLLRDNORM;
@@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
}
/* Connected sockets that can produce data can be written. */
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
bool space_avail_now = false;
int ret = transport->notify_poll_out(
@@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == SS_UNCONNECTED) {
+ if (sk->sk_state == TCP_CLOSE) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
@@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk = sk_vsock(vsk);
lock_sock(sk);
- if (sk->sk_state == SS_CONNECTING &&
+ if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
cancel = 1;
@@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = -EALREADY;
break;
default:
- if ((sk->sk_state == VSOCK_SS_LISTEN) ||
+ if ((sk->sk_state == TCP_LISTEN) ||
vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
err = -EINVAL;
goto out;
@@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- sk->sk_state = SS_CONNECTING;
+ sk->sk_state = TCP_SYN_SENT;
err = transport->connect(vsk);
if (err < 0)
@@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+ while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
@@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
} else {
err = 0;
@@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
goto out;
}
- if (listener->sk_state != VSOCK_SS_LISTEN) {
+ if (listener->sk_state != TCP_LISTEN) {
err = -EINVAL;
goto out;
}
@@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = VSOCK_SS_LISTEN;
+ sk->sk_state = TCP_LISTEN;
err = 0;
@@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Callers should not provide a destination with stream sockets. */
if (msg->msg_namelen) {
- err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out;
}
@@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (sk->sk_state != SS_CONNECTED ||
+ if (sk->sk_state != TCP_ESTABLISHED ||
!vsock_addr_bound(&vsk->local_addr)) {
err = -ENOTCONN;
goto out;
@@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sk->sk_state != SS_CONNECTED) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
* peer has not connected or a local shutdown occured with the
@@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
vsock_proto.owner = owner;
transport = t;
- vsock_init_tables();
-
vsock_device.minor = MISC_DYNAMIC_MINOR;
err = misc_register(&vsock_device);
if (err) {
@@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void)
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
+module_init(vsock_init_tables);
+
MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.2.0-k");
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 0000000..31b5676
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
+/*
+ * vsock sock_diag(7) module
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
+#include <net/af_vsock.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u32 flags)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct vsock_diag_msg *rep;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ rep->vdiag_family = AF_VSOCK;
+
+ /* Lock order dictates that sk_lock is acquired before
+ * vsock_table_lock, so we cannot lock here. Simply don't take
+ * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
+ * held.
+ */
+ rep->vdiag_type = sk->sk_type;
+ rep->vdiag_state = sk->sk_state;
+ rep->vdiag_shutdown = sk->sk_shutdown;
+ rep->vdiag_src_cid = vsk->local_addr.svm_cid;
+ rep->vdiag_src_port = vsk->local_addr.svm_port;
+ rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
+ rep->vdiag_dst_port = vsk->remote_addr.svm_port;
+ rep->vdiag_ino = sock_i_ino(sk);
+
+ sock_diag_save_cookie(sk, rep->vdiag_cookie);
+
+ return 0;
+}
+
+static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct vsock_diag_req *req;
+ struct vsock_sock *vsk;
+ unsigned int bucket;
+ unsigned int last_i;
+ unsigned int table;
+ struct net *net;
+ unsigned int i;
+
+ req = nlmsg_data(cb->nlh);
+ net = sock_net(skb->sk);
+
+ /* State saved between calls: */
+ table = cb->args[0];
+ bucket = cb->args[1];
+ i = last_i = cb->args[2];
+
+ /* TODO VMCI pending sockets? */
+
+ spin_lock_bh(&vsock_table_lock);
+
+ /* Bind table (locally created sockets) */
+ if (table == 0) {
+ while (bucket < ARRAY_SIZE(vsock_bind_table)) {
+ struct list_head *head = &vsock_bind_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, bound_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_bind;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_bind;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_bind:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+ table++;
+ bucket = 0;
+ }
+
+ /* Connected table (accepted connections) */
+ while (bucket < ARRAY_SIZE(vsock_connected_table)) {
+ struct list_head *head = &vsock_connected_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, connected_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ /* Skip sockets we've already seen above */
+ if (__vsock_in_bound_table(vsk))
+ continue;
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_connected;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_connected;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_connected:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+done:
+ spin_unlock_bh(&vsock_table_lock);
+
+ cb->args[0] = table;
+ cb->args[1] = bucket;
+ cb->args[2] = i;
+
+ return skb->len;
+}
+
+static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct vsock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = vsock_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler vsock_diag_handler = {
+ .family = AF_VSOCK,
+ .dump = vsock_diag_handler_dump,
+};
+
+static int __init vsock_diag_init(void)
+{
+ return sock_diag_register(&vsock_diag_handler);
+}
+
+static void __exit vsock_diag_exit(void)
+{
+ sock_diag_unregister(&vsock_diag_handler);
+}
+
+module_init(vsock_diag_init);
+module_exit(vsock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
+ 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 14ed5a3..5583df7 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -310,11 +310,15 @@ static void hvs_close_connection(struct vmbus_channel *chan)
struct sock *sk = get_per_channel_state(chan);
struct vsock_sock *vsk = vsock_sk(sk);
- sk->sk_state = SS_UNCONNECTED;
+ lock_sock(sk);
+
+ sk->sk_state = TCP_CLOSE;
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
sk->sk_state_change(sk);
+
+ release_sock(sk);
}
static void hvs_open_connection(struct vmbus_channel *chan)
@@ -344,8 +348,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (!sk)
return;
- if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
- (!conn_from_host && sk->sk_state != SS_CONNECTING))
+ lock_sock(sk);
+ if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
+ (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
goto out;
if (conn_from_host) {
@@ -357,7 +362,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (!new)
goto out;
- new->sk_state = SS_CONNECTING;
+ new->sk_state = TCP_SYN_SENT;
vnew = vsock_sk(new);
hvs_new = vnew->trans;
hvs_new->chan = chan;
@@ -384,7 +389,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
if (conn_from_host) {
- new->sk_state = SS_CONNECTED;
+ new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
hvs_addr_init(&vnew->local_addr, if_type);
@@ -395,11 +400,9 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vsock_insert_connected(vnew);
- lock_sock(sk);
vsock_enqueue_accept(sk, new);
- release_sock(sk);
} else {
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsock_sk(sk));
@@ -410,6 +413,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
out:
/* Release refcnt obtained when we called vsock_find_bound_socket() */
sock_put(sk);
+
+ release_sock(sk);
}
static u32 hvs_get_local_cid(void)
@@ -476,13 +481,21 @@ out:
static void hvs_release(struct vsock_sock *vsk)
{
+ struct sock *sk = sk_vsock(vsk);
struct hvsock *hvs = vsk->trans;
- struct vmbus_channel *chan = hvs->chan;
+ struct vmbus_channel *chan;
+
+ lock_sock(sk);
+
+ sk->sk_state = SS_DISCONNECTING;
+ vsock_remove_sock(vsk);
+
+ release_sock(sk);
+ chan = hvs->chan;
if (chan)
hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
- vsock_remove_sock(vsk);
}
static void hvs_destruct(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e..8e03bd3 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
lock_sock(sk);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab..3ae3a33 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
- if (!(sk->sk_state == SS_CONNECTED ||
- sk->sk_state == SS_DISCONNECTING))
+ if (!(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_CLOSING))
return true;
/* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
destroy:
virtio_transport_reset(vsk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
- child->sk_state = SS_CONNECTED;
+ child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
sk->sk_write_space(sk);
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
virtio_transport_recv_connecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
virtio_transport_recv_connected(sk, pkt);
break;
- case SS_DISCONNECTING:
+ case TCP_CLOSING:
virtio_transport_recv_disconnecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 0206155..391775e 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -742,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
/* The local context ID may be out of date, update it. */
vsk->local_addr.svm_cid = dst.svm_cid;
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vmci_trans(vsk)->notify_ops->handle_notify_pkt(
sk, pkt, true, &dst, &src,
&bh_process_pkt);
@@ -800,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
* left in our consume queue.
*/
if (vsock_stream_has_data(vsk) <= 0) {
- if (sk->sk_state == SS_CONNECTING) {
+ sk->sk_state = TCP_CLOSE;
+
+ if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
* if the peer VM is killed after attaching to
@@ -809,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
- sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
- sk->sk_state = SS_UNCONNECTED;
}
sk->sk_state_change(sk);
}
@@ -882,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
vmci_transport_recv_listen(sk, pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
/* Processing of pending connections for servers goes through
* the listening socket, so see vmci_transport_recv_listen()
* for that path.
*/
vmci_transport_recv_connecting_client(sk, pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
vmci_transport_recv_connected(sk, pkt);
break;
default:
@@ -941,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
switch (pending->sk_state) {
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
err = vmci_transport_recv_connecting_server(sk,
pending,
pkt);
@@ -1071,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_add_pending(sk, pending);
sk->sk_ack_backlog++;
- pending->sk_state = SS_CONNECTING;
+ pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
vmci_trans(vpending)->consume_size = qp_size;
vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1196,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
* the socket will be valid until it is removed from the queue.
*
* If we fail sending the attach below, we remove the socket from the
- * connected list and move the socket to SS_UNCONNECTED before
+ * connected list and move the socket to TCP_CLOSE before
* releasing the lock, so a pending slow path processing of an incoming
* packet will not see the socket in the connected state in that case.
*/
- pending->sk_state = SS_CONNECTED;
+ pending->sk_state = TCP_ESTABLISHED;
vsock_insert_connected(vpending);
@@ -1231,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
destroy:
pending->sk_err = skerr;
- pending->sk_state = SS_UNCONNECTED;
+ pending->sk_state = TCP_CLOSE;
/* As long as we drop our reference, all necessary cleanup will handle
* when the cleanup function drops its reference and our destruct
* implementation is called. Note that since the listen handler will
@@ -1269,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
* accounting (it can already be found since it's in the bound
* table).
*/
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -1337,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
destroy:
vmci_transport_send_reset(sk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -1525,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
break;
@@ -1789,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
err = vmci_transport_send_conn_request(
sk, vmci_trans(vsk)->queue_pair_size);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
} else {
@@ -1799,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
sk, vmci_trans(vsk)->queue_pair_size,
supported_proto_versions);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4..41fb427 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
return -1;
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc..0cc84f2 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
*data_ready_now = false;
}
diff --git a/net/wimax/Makefile b/net/wimax/Makefile
index 8f1510d..eb2db0d 100644
--- a/net/wimax/Makefile
+++ b/net/wimax/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_WIMAX) += wimax.o
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b..61cbc30 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
-regdb.c
+shipped-certs.c
+extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c60612..da91bb5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
+ select FW_LOADER
---help---
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
you are a wireless researcher and are working in a controlled
and approved environment by your local regulatory agency.
+config CFG80211_REQUIRE_SIGNED_REGDB
+ bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
+ default y
+ select SYSTEM_DATA_VERIFICATION
+ help
+ Require that in addition to the "regulatory.db" file a
+ "regulatory.db.p7s" can be loaded with a valid PKCS#7
+ signature for the regulatory.db file made by one of the
+ keys in the certs/ directory.
+
+config CFG80211_USE_KERNEL_REGDB_KEYS
+ bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
+ default y
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ Allow the regulatory database to be signed by one of the keys for
+ which certificates are part of the kernel sources
+ (in net/wireless/certs/).
+
+ This is currently only Seth Forshee's key, who is the regulatory
+ database maintainer.
+
+config CFG80211_EXTRA_REGDB_KEYDIR
+ string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ If selected, point to a directory with DER-encoded X.509
+ certificates like in the kernel sources (net/wireless/certs/)
+ that shall be accepted for a signed regulatory database.
+
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
If unsure, say N.
-config CFG80211_INTERNAL_REGDB
- bool "use statically compiled regulatory rules database" if EXPERT
- default n
- depends on CFG80211
- ---help---
- This option generates an internal data structure representing
- the wireless regulatory rules described in net/wireless/db.txt
- and includes code to query that database. This is an alternative
- to using CRDA for defining regulatory rules for the kernel.
-
- Using this option requires some parsing of the db.txt at build time,
- the parser will be upkept with the latest wireless-regdb updates but
- older wireless-regdb formats will be ignored. The parser may later
- be replaced to avoid issues with conflicts on versions of
- wireless-regdb.
-
- For details see:
-
- http://wireless.kernel.org/en/developers/Regulatory
-
- Most distributions have a CRDA package. So if unsure, say N.
-
config CFG80211_CRDA_SUPPORT
- bool "support CRDA" if CFG80211_INTERNAL_REGDB
+ bool "support CRDA" if EXPERT
default y
depends on CFG80211
help
You should enable this option unless you know for sure you have no
- need for it, for example when using internal regdb (above.)
+ need for it, for example when using internal regdb (above) or the
+ database loaded as a firmware file.
If unsure, say Y.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d06e501..278d979 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CFG80211) += cfg80211.o
obj-$(CONFIG_LIB80211) += lib80211.o
obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
@@ -14,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
cfg80211-$(CONFIG_OF) += of.o
cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
-cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
CFLAGS_trace.o := -I$(src)
-$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
- @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
+cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
+ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
+cfg80211-y += extra-certs.o
+endif
-clean-files := regdb.c
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 shipped_regdb_certs[] = {' >> $@
+ @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
+ @echo '};' >> $@
+ @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
+ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 extra_regdb_certs[] = {' >> $@
+ @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
+ @echo '};' >> $@
+ @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index 25666d3..6368217 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 0000000..c6f8f9d
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
Binary files differ
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b8aa5a7..a488599 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This file contains helper code to handle channel
* settings and keeping track of what is possible at
@@ -464,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
struct ieee80211_channel *chan)
{
int width;
- u32 cf_offset, freq;
+ u32 freq;
if (chandef->chan->center_freq == chan->center_freq)
return true;
@@ -473,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
if (width <= 20)
return false;
- cf_offset = width / 2 - 10;
-
for (freq = chandef->center_freq1 - width / 2 + 10;
freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c..fdde0d9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
out_fail_pernet:
return err;
}
-subsys_initcall(cfg80211_init);
+fs_initcall(cfg80211_init);
static void __exit cfg80211_exit(void)
{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6e809325..d2f7e8b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Wireless configuration interface internals.
*
@@ -216,6 +217,7 @@ enum cfg80211_event_type {
EVENT_DISCONNECTED,
EVENT_IBSS_JOINED,
EVENT_STOPPED,
+ EVENT_PORT_AUTHORIZED,
};
struct cfg80211_event {
@@ -235,6 +237,9 @@ struct cfg80211_event {
u8 bssid[ETH_ALEN];
struct ieee80211_channel *channel;
} ij;
+ struct {
+ u8 bssid[ETH_ALEN];
+ } pa;
};
};
@@ -385,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
struct cfg80211_roam_info *info);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a0..0000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# This file is a placeholder to prevent accidental build breakage if someone
-# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to
-# enable that build option.
-#
-# You should be using CRDA instead. It is even better if you use the CRDA
-# package provided by your distribution, since they will probably keep it
-# up-to-date on your behalf.
-#
-# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
-# need to replace this file with one containing appropriately formatted
-# regulatory rules that cover the regulatory domains you will be using. Your
-# best option is to extract the db.txt file from the wireless-regdb git
-# repository:
-#
-# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
-#
diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h
index 74fdd38..a8a135d 100644
--- a/net/wireless/debugfs.h
+++ b/net/wireless/debugfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG80211_DEBUGFS_H
#define __CFG80211_DEBUGFS_H
diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c
index e9e9129..a9c0f36 100644
--- a/net/wireless/ethtool.c
+++ b/net/wireless/ethtool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/utsname.h>
#include <net/cfg80211.h>
#include "core.h"
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426..0000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/awk -f
-#
-# genregdb.awk -- generate regdb.c from db.txt
-#
-# Actually, it reads from stdin (presumed to be db.txt) and writes
-# to stdout (presumed to be regdb.c), but close enough...
-#
-# Copyright 2009 John W. Linville <linville@tuxdriver.com>
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-BEGIN {
- active = 0
- rules = 0;
- print "/*"
- print " * DO NOT EDIT -- file generated from data in db.txt"
- print " */"
- print ""
- print "#include <linux/nl80211.h>"
- print "#include <net/cfg80211.h>"
- print "#include \"regdb.h\""
- print ""
- regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
-}
-
-function parse_country_head() {
- country=$2
- sub(/:/, "", country)
- printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
- printf "\t.alpha2 = \"%s\",\n", country
- if ($NF ~ /DFS-ETSI/)
- printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
- else if ($NF ~ /DFS-FCC/)
- printf "\t.dfs_region = NL80211_DFS_FCC,\n"
- else if ($NF ~ /DFS-JP/)
- printf "\t.dfs_region = NL80211_DFS_JP,\n"
- printf "\t.reg_rules = {\n"
- active = 1
- regdb = regdb "\t&regdom_" country ",\n"
-}
-
-function parse_reg_rule()
-{
- flag_starts_at = 7
-
- start = $1
- sub(/\(/, "", start)
- end = $3
- bw = $5
- sub(/\),/, "", bw)
- gain = 0
- power = $6
- # power might be in mW...
- units = $7
- dfs_cac = 0
-
- sub(/\(/, "", power)
- sub(/\),/, "", power)
- sub(/\),/, "", units)
- sub(/\)/, "", units)
-
- if (units == "mW") {
- flag_starts_at = 8
- power = 10 * log(power)/log(10)
- if ($8 ~ /[[:digit:]]/) {
- flag_starts_at = 9
- dfs_cac = $8
- }
- } else {
- if ($7 ~ /[[:digit:]]/) {
- flag_starts_at = 8
- dfs_cac = $7
- }
- }
- sub(/\(/, "", dfs_cac)
- sub(/\),/, "", dfs_cac)
- flagstr = ""
- for (i=flag_starts_at; i<=NF; i++)
- flagstr = flagstr $i
- split(flagstr, flagarray, ",")
- flags = ""
- for (arg in flagarray) {
- if (flagarray[arg] == "NO-OFDM") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
- } else if (flagarray[arg] == "NO-CCK") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
- } else if (flagarray[arg] == "NO-INDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
- } else if (flagarray[arg] == "NO-OUTDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
- } else if (flagarray[arg] == "DFS") {
- flags = flags "\n\t\t\tNL80211_RRF_DFS | "
- } else if (flagarray[arg] == "PTP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
- } else if (flagarray[arg] == "PTMP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
- } else if (flagarray[arg] == "PASSIVE-SCAN") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IBSS") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "AUTO-BW") {
- flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
- }
-
- }
- flags = flags "0"
- printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
- rules++
-}
-
-function print_tail_country()
-{
- active = 0
- printf "\t},\n"
- printf "\t.n_reg_rules = %d\n", rules
- printf "};\n\n"
- rules = 0;
-}
-
-/^[ \t]*#/ {
- # Ignore
-}
-
-!active && /^[ \t]*$/ {
- # Ignore
-}
-
-!active && /country/ {
- parse_country_head()
-}
-
-active && /^[ \t]*\(/ {
- parse_reg_rule()
-}
-
-active && /^[ \t]*$/ {
- print_tail_country()
-}
-
-END {
- if (active)
- print_tail_country()
- print regdb "};"
- print ""
- print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
-}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 10bf040..413d4f4 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Some IBSS support code for cfg80211.
*
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 421a6b8..51aa556 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ieee80211.h>
#include <linux/export.h>
#include <net/cfg80211.h>
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d8df7a5..e7c64a8 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 MLME SAP interface
*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6908742..fce2cbe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
};
+/* policy for packet pattern attributes */
+static const struct nla_policy
+nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
+ [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
+};
+
static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct cfg80211_registered_device **rdev,
@@ -2122,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
case NL80211_CHAN_HT40MINUS:
cfg80211_chandef_create(chandef, chandef->chan,
chantype);
+ /* user input for center_freq is incorrect */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+ chandef->center_freq1 != nla_get_u32(
+ info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+ return -EINVAL;
+ /* center_freq2 must be zero */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+ nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+ return -EINVAL;
break;
default:
return -EINVAL;
@@ -5669,6 +5686,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
+static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+{
+ return reg_reload_regdb();
+}
+
static int nl80211_get_mesh_config(struct sk_buff *skb,
struct genl_info *info)
{
@@ -6610,6 +6632,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
return regulatory_pre_cac_allowed(wdev->wiphy);
}
+static int
+nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+ void *request, struct nlattr **attrs,
+ bool is_sched_scan)
+{
+ u8 *mac_addr, *mac_addr_mask;
+ u32 *flags;
+ enum nl80211_feature_flags randomness_flag;
+
+ if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+ return 0;
+
+ if (is_sched_scan) {
+ struct cfg80211_sched_scan_request *req = request;
+
+ randomness_flag = wdev ?
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ } else {
+ struct cfg80211_scan_request *req = request;
+
+ randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ }
+
+ *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+
+ if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ return -EOPNOTSUPP;
+
+ if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+ int err;
+
+ if (!(wiphy->features & randomness_flag) ||
+ (wdev && wdev->current_bss))
+ return -EOPNOTSUPP;
+
+ err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+ if (err)
+ return err;
+ }
+
+ if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6815,34 +6908,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
}
- if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- info->attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- if (!(wiphy->features &
- NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(info->attrs,
- request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+ false);
+ if (err)
+ goto out_free;
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7290,37 +7359,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request->ie_len);
}
- if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-
- if (!wdev) /* must be net-detect */
- flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-
- if (!(wiphy->features & flg)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev && wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(attrs, request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+ if (err)
+ goto out_free;
if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
request->delay =
@@ -8924,8 +8965,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_USE_MFP]) {
connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+ if (connect.mfp == NL80211_MFP_OPTIONAL &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL))
+ return -EOPNOTSUPP;
+
if (connect.mfp != NL80211_MFP_REQUIRED &&
- connect.mfp != NL80211_MFP_NO)
+ connect.mfp != NL80211_MFP_NO &&
+ connect.mfp != NL80211_MFP_OPTIONAL)
return -EINVAL;
} else {
connect.mfp = NL80211_MFP_NO;
@@ -10532,7 +10579,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
u8 *mask_pat;
nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- NULL, info->extack);
+ nl80211_packet_pattern_policy,
+ info->extack);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10781,7 +10829,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL);
+ nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy, NULL);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -12675,6 +12724,12 @@ static const struct genl_ops nl80211_ops[] = {
.flags = GENL_ADMIN_PERM,
},
{
+ .cmd = NL80211_CMD_RELOAD_REGDB,
+ .doit = nl80211_reload_regdb,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
.cmd = NL80211_CMD_GET_MESH_CONFIG,
.doit = nl80211_get_mesh_config,
.policy = nl80211_policy,
@@ -13802,9 +13857,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
info->req_ie)) ||
(info->resp_ie &&
nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
- info->resp_ie)) ||
- (info->authorized &&
- nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+ info->resp_ie)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13818,6 +13871,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+}
+
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b969333..79e47fe 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_WIRELESS_NL80211_H
#define __NET_WIRELESS_NL80211_H
@@ -58,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
struct cfg80211_roam_info *info, gfp_t gfp);
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid);
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ce23d7d..0c06240 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CFG80211_RDEV_OPS
#define __CFG80211_RDEV_OPS
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6e94f69..3871998 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -53,12 +53,13 @@
#include <linux/ctype.h>
#include <linux/nl80211.h>
#include <linux/platform_device.h>
+#include <linux/verification.h>
#include <linux/moduleparam.h>
+#include <linux/firmware.h>
#include <net/cfg80211.h>
#include "core.h"
#include "reg.h"
#include "rdev-ops.h"
-#include "regdb.h"
#include "nl80211.h"
/*
@@ -100,7 +101,7 @@ static struct regulatory_request core_request_world = {
static struct regulatory_request __rcu *last_request =
(void __force __rcu *)&core_request_world;
-/* To trigger userspace events */
+/* To trigger userspace events and load firmware */
static struct platform_device *reg_pdev;
/*
@@ -443,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
return regd;
}
-#ifdef CONFIG_CFG80211_INTERNAL_REGDB
struct reg_regdb_apply_request {
struct list_head list;
const struct ieee80211_regdomain *regdom;
@@ -475,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
-static int reg_query_builtin(const char *alpha2)
+static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
{
- const struct ieee80211_regdomain *regdom = NULL;
struct reg_regdb_apply_request *request;
- unsigned int i;
-
- for (i = 0; i < reg_regdb_size; i++) {
- if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
- regdom = reg_regdb[i];
- break;
- }
- }
-
- if (!regdom)
- return -ENODATA;
request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
- if (!request)
- return -ENOMEM;
-
- request->regdom = reg_copy_regd(regdom);
- if (IS_ERR_OR_NULL(request->regdom)) {
- kfree(request);
+ if (!request) {
+ kfree(regdom);
return -ENOMEM;
}
+ request->regdom = regdom;
+
mutex_lock(&reg_regdb_apply_mutex);
list_add_tail(&request->list, &reg_regdb_apply_list);
mutex_unlock(&reg_regdb_apply_mutex);
schedule_work(&reg_regdb_work);
-
return 0;
}
-/* Feel free to add any other sanity checks here */
-static void reg_regdb_size_check(void)
-{
- /* We should ideally BUILD_BUG_ON() but then random builds would fail */
- WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
-}
-#else
-static inline void reg_regdb_size_check(void) {}
-static inline int reg_query_builtin(const char *alpha2)
-{
- return -ENODATA;
-}
-#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
-
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
/* Max number of consecutive attempts to communicate with CRDA */
#define REG_MAX_CRDA_TIMEOUTS 10
@@ -599,10 +570,402 @@ static inline int call_crda(const char *alpha2)
}
#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
+/* code to directly load a firmware database through request_firmware */
+static const struct fwdb_header *regdb;
+
+struct fwdb_country {
+ u8 alpha2[2];
+ __be16 coll_ptr;
+ /* this struct cannot be extended */
+} __packed __aligned(4);
+
+struct fwdb_collection {
+ u8 len;
+ u8 n_rules;
+ u8 dfs_region;
+ /* no optional data yet */
+ /* aligned to 2, then followed by __be16 array of rule pointers */
+} __packed __aligned(4);
+
+enum fwdb_flags {
+ FWDB_FLAG_NO_OFDM = BIT(0),
+ FWDB_FLAG_NO_OUTDOOR = BIT(1),
+ FWDB_FLAG_DFS = BIT(2),
+ FWDB_FLAG_NO_IR = BIT(3),
+ FWDB_FLAG_AUTO_BW = BIT(4),
+};
+
+struct fwdb_rule {
+ u8 len;
+ u8 flags;
+ __be16 max_eirp;
+ __be32 start, end, max_bw;
+ /* start of optional data */
+ __be16 cac_timeout;
+} __packed __aligned(4);
+
+#define FWDB_MAGIC 0x52474442
+#define FWDB_VERSION 20
+
+struct fwdb_header {
+ __be32 magic;
+ __be32 version;
+ struct fwdb_country country[];
+} __packed __aligned(4);
+
+static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
+{
+ struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
+
+ if ((u8 *)rule + sizeof(rule->len) > data + size)
+ return false;
+
+ /* mandatory fields */
+ if (rule->len < offsetofend(struct fwdb_rule, max_bw))
+ return false;
+
+ return true;
+}
+
+static bool valid_country(const u8 *data, unsigned int size,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)(data + ptr);
+ __be16 *rules_ptr;
+ unsigned int i;
+
+ /* make sure we can read len/n_rules */
+ if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
+ return false;
+
+ /* make sure base struct and all rules fit */
+ if ((u8 *)coll + ALIGN(coll->len, 2) +
+ (coll->n_rules * 2) > data + size)
+ return false;
+
+ /* mandatory fields must exist */
+ if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
+ return false;
+
+ rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+
+ for (i = 0; i < coll->n_rules; i++) {
+ u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
+
+ if (!valid_rule(data, size, rule_ptr))
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
+static struct key *builtin_regdb_keys;
+
+static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
+{
+ const u8 *end = p + buflen;
+ size_t plen;
+ key_ref_t key;
+
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
+ "asymmetric", NULL, p, plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+}
+
+static int __init load_builtin_regdb_keys(void)
+{
+ builtin_regdb_keys =
+ keyring_alloc(".builtin_regdb_keys",
+ KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(builtin_regdb_keys))
+ return PTR_ERR(builtin_regdb_keys);
+
+ pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
+
+#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
+ load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+#endif
+#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
+ if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
+ load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+#endif
+
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ const struct firmware *sig;
+ bool result;
+
+ if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
+ return false;
+
+ result = verify_pkcs7_signature(data, size, sig->data, sig->size,
+ builtin_regdb_keys,
+ VERIFYING_UNSPECIFIED_SIGNATURE,
+ NULL, NULL) == 0;
+
+ release_firmware(sig);
+
+ return result;
+}
+
+static void free_regdb_keyring(void)
+{
+ key_put(builtin_regdb_keys);
+}
+#else
+static int load_builtin_regdb_keys(void)
+{
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ return true;
+}
+
+static void free_regdb_keyring(void)
+{
+}
+#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
+
+static bool valid_regdb(const u8 *data, unsigned int size)
+{
+ const struct fwdb_header *hdr = (void *)data;
+ const struct fwdb_country *country;
+
+ if (size < sizeof(*hdr))
+ return false;
+
+ if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
+ return false;
+
+ if (hdr->version != cpu_to_be32(FWDB_VERSION))
+ return false;
+
+ if (!regdb_has_valid_signature(data, size))
+ return false;
+
+ country = &hdr->country[0];
+ while ((u8 *)(country + 1) <= data + size) {
+ if (!country->coll_ptr)
+ break;
+ if (!valid_country(data, size, country))
+ return false;
+ country++;
+ }
+
+ return true;
+}
+
+static int regdb_query_country(const struct fwdb_header *db,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+ struct ieee80211_regdomain *regdom;
+ unsigned int size_of_regd;
+ unsigned int i;
+
+ size_of_regd =
+ sizeof(struct ieee80211_regdomain) +
+ coll->n_rules * sizeof(struct ieee80211_reg_rule);
+
+ regdom = kzalloc(size_of_regd, GFP_KERNEL);
+ if (!regdom)
+ return -ENOMEM;
+
+ regdom->n_reg_rules = coll->n_rules;
+ regdom->alpha2[0] = country->alpha2[0];
+ regdom->alpha2[1] = country->alpha2[1];
+ regdom->dfs_region = coll->dfs_region;
+
+ for (i = 0; i < regdom->n_reg_rules; i++) {
+ __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+ unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+ struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
+ struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
+
+ rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
+ rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
+ rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
+
+ rrule->power_rule.max_antenna_gain = 0;
+ rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
+
+ rrule->flags = 0;
+ if (rule->flags & FWDB_FLAG_NO_OFDM)
+ rrule->flags |= NL80211_RRF_NO_OFDM;
+ if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
+ rrule->flags |= NL80211_RRF_NO_OUTDOOR;
+ if (rule->flags & FWDB_FLAG_DFS)
+ rrule->flags |= NL80211_RRF_DFS;
+ if (rule->flags & FWDB_FLAG_NO_IR)
+ rrule->flags |= NL80211_RRF_NO_IR;
+ if (rule->flags & FWDB_FLAG_AUTO_BW)
+ rrule->flags |= NL80211_RRF_AUTO_BW;
+
+ rrule->dfs_cac_ms = 0;
+
+ /* handle optional data */
+ if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
+ rrule->dfs_cac_ms =
+ 1000 * be16_to_cpu(rule->cac_timeout);
+ }
+
+ return reg_schedule_apply(regdom);
+}
+
+static int query_regdb(const char *alpha2)
+{
+ const struct fwdb_header *hdr = regdb;
+ const struct fwdb_country *country;
+
+ ASSERT_RTNL();
+
+ if (IS_ERR(regdb))
+ return PTR_ERR(regdb);
+
+ country = &hdr->country[0];
+ while (country->coll_ptr) {
+ if (alpha2_equal(alpha2, country->alpha2))
+ return regdb_query_country(regdb, country);
+ country++;
+ }
+
+ return -ENODATA;
+}
+
+static void regdb_fw_cb(const struct firmware *fw, void *context)
+{
+ int set_error = 0;
+ bool restore = true;
+ void *db;
+
+ if (!fw) {
+ pr_info("failed to load regulatory.db\n");
+ set_error = -ENODATA;
+ } else if (!valid_regdb(fw->data, fw->size)) {
+ pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
+ set_error = -EINVAL;
+ }
+
+ rtnl_lock();
+ if (WARN_ON(regdb && !IS_ERR(regdb))) {
+ /* just restore and free new db */
+ } else if (set_error) {
+ regdb = ERR_PTR(set_error);
+ } else if (fw) {
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (db) {
+ regdb = db;
+ restore = context && query_regdb(context);
+ } else {
+ restore = true;
+ }
+ }
+
+ if (restore)
+ restore_regulatory_settings(true);
+
+ rtnl_unlock();
+
+ kfree(context);
+
+ release_firmware(fw);
+}
+
+static int query_regdb_file(const char *alpha2)
+{
+ ASSERT_RTNL();
+
+ if (regdb)
+ return query_regdb(alpha2);
+
+ alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
+ if (!alpha2)
+ return -ENOMEM;
+
+ return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ &reg_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+}
+
+int reg_reload_regdb(void)
+{
+ const struct firmware *fw;
+ void *db;
+ int err;
+
+ err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
+ if (err)
+ return err;
+
+ if (!valid_regdb(fw->data, fw->size)) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (!db) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rtnl_lock();
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+ regdb = db;
+ rtnl_unlock();
+
+ out:
+ release_firmware(fw);
+ return err;
+}
+
static bool reg_query_database(struct regulatory_request *request)
{
- /* query internal regulatory database (if it exists) */
- if (reg_query_builtin(request->alpha2) == 0)
+ if (query_regdb_file(request->alpha2) == 0)
return true;
if (call_crda(request->alpha2) == 0)
@@ -3285,6 +3648,10 @@ int __init regulatory_init(void)
{
int err = 0;
+ err = load_builtin_regdb_keys();
+ if (err)
+ return err;
+
reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
if (IS_ERR(reg_pdev))
return PTR_ERR(reg_pdev);
@@ -3293,8 +3660,6 @@ int __init regulatory_init(void)
spin_lock_init(&reg_pending_beacons_lock);
spin_lock_init(&reg_indoor_lock);
- reg_regdb_size_check();
-
rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
user_alpha2[0] = '9';
@@ -3360,4 +3725,9 @@ void regulatory_exit(void)
list_del(&reg_request->list);
kfree(reg_request);
}
+
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+
+ free_regdb_keyring();
}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf..9ceeb5f 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
#ifndef __NET_WIRELESS_REG_H
#define __NET_WIRELESS_REG_H
+
+#include <net/cfg80211.h>
+
/*
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
*
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
* @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
+
+/**
+ * reg_reload_regdb - reload the regulatory.db firmware file
+ */
+int reg_reload_regdb(void);
+
+extern const u8 shipped_regdb_certs[];
+extern unsigned int shipped_regdb_certs_len;
+extern const u8 extra_regdb_certs[];
+extern unsigned int extra_regdb_certs_len;
+
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfc..0000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __REGDB_H__
-#define __REGDB_H__
-
-/*
- * Copyright 2009 John W. Linville <linville@tuxdriver.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-extern const struct ieee80211_regdomain *reg_regdb[];
-extern int reg_regdb_size;
-
-#endif /* __REGDB_H__ */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 9f0901f..f6c5fe48 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 scan result handling
*
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0a49b88..fdb3646 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SME code for cfg80211
* both driver SME event handling and the SME implementation
@@ -522,11 +523,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
return -EOPNOTSUPP;
if (wdev->current_bss) {
- if (!prev_bssid)
- return -EALREADY;
- if (prev_bssid &&
- !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
- return -ENOTCONN;
cfg80211_unhold_bss(wdev->current_bss);
cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
wdev->current_bss = NULL;
@@ -960,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
ev->rm.resp_ie_len = info->resp_ie_len;
memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
ev->rm.bss = info->bss;
- ev->rm.authorized = info->authorized;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
@@ -969,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
}
EXPORT_SYMBOL(cfg80211_roamed);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
+{
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ return;
+
+ if (WARN_ON(!wdev->current_bss) ||
+ WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ return;
+
+ nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
+ bssid);
+}
+
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+ gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_event *ev;
+ unsigned long flags;
+
+ if (WARN_ON(!bssid))
+ return;
+
+ ev = kzalloc(sizeof(*ev), gfp);
+ if (!ev)
+ return;
+
+ ev->type = EVENT_PORT_AUTHORIZED;
+ memcpy(ev->pa.bssid, bssid, ETH_ALEN);
+
+ /*
+ * Use the wdev event list so that if there are pending
+ * connected/roamed events, they will be reported first.
+ */
+ spin_lock_irqsave(&wdev->event_lock, flags);
+ list_add_tail(&ev->list, &wdev->event_list);
+ spin_unlock_irqrestore(&wdev->event_lock, flags);
+ queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_port_authorized);
+
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
{
@@ -1063,11 +1102,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->connect_keys)) {
- kzfree(wdev->connect_keys);
- wdev->connect_keys = NULL;
+ /*
+ * If we have an ssid_len, we're trying to connect or are
+ * already connected, so reject a new SSID unless it's the
+ * same (which is the case for re-association.)
+ */
+ if (wdev->ssid_len &&
+ (wdev->ssid_len != connect->ssid_len ||
+ memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
+ return -EALREADY;
+
+ /*
+ * If connected, reject (re-)association unless prev_bssid
+ * matches the current BSSID.
+ */
+ if (wdev->current_bss) {
+ if (!prev_bssid)
+ return -EALREADY;
+ if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
+ return -ENOTCONN;
}
+ /*
+ * Reject if we're in the process of connecting with WEP,
+ * this case isn't very interesting and trying to handle
+ * it would make the code much more complex.
+ */
+ if (wdev->connect_keys)
+ return -EINPROGRESS;
+
cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
@@ -1118,7 +1181,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
if (err) {
wdev->connect_keys = NULL;
- wdev->ssid_len = 0;
+ /*
+ * This could be reassoc getting refused, don't clear
+ * ssid_len in that case.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
return err;
}
@@ -1145,6 +1213,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
else if (wdev->ssid_len)
err = rdev_disconnect(rdev, dev, reason);
+ /*
+ * Clear ssid_len unless we actually were fully connected,
+ * in which case cfg80211_disconnected() will take care of
+ * this later.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
+
return err;
}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
index b533ed7..7b454c2 100644
--- a/net/wireless/sysfs.h
+++ b/net/wireless/sysfs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __WIRELESS_SYSFS_H
#define __WIRELESS_SYSFS_H
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0f8db41..f3353fe 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cfg80211
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bcb1284..c691606 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Wireless utility functions
*
@@ -157,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
case NL80211_BAND_2GHZ:
want = 7;
for (i = 0; i < sband->n_bitrates; i++) {
- if (sband->bitrates[i].bitrate == 10) {
+ switch (sband->bitrates[i].bitrate) {
+ case 10:
+ case 20:
+ case 55:
+ case 110:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_B |
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate == 20 ||
- sband->bitrates[i].bitrate == 55 ||
- sband->bitrates[i].bitrate == 110 ||
- sband->bitrates[i].bitrate == 60 ||
- sband->bitrates[i].bitrate == 120 ||
- sband->bitrates[i].bitrate == 240) {
+ break;
+ case 60:
+ case 120:
+ case 240:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate != 10 &&
- sband->bitrates[i].bitrate != 20 &&
- sband->bitrates[i].bitrate != 55 &&
- sband->bitrates[i].bitrate != 110)
+ /* fall through */
+ default:
sband->bitrates[i].flags |=
IEEE80211_RATE_ERP_G;
+ break;
+ }
}
- WARN_ON(want != 0 && want != 3 && want != 6);
+ WARN_ON(want != 0 && want != 3);
break;
case NL80211_BAND_60GHZ:
/* check for mandatory HT MCS 1..4 */
@@ -529,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
}
EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype,
- const u8 *bssid, bool qos)
-{
- struct ieee80211_hdr hdr;
- u16 hdrlen, ethertype;
- __le16 fc;
- const u8 *encaps_data;
- int encaps_len, skip_header_bytes;
- int nh_pos, h_pos;
- int head_need;
-
- if (unlikely(skb->len < ETH_HLEN))
- return -EINVAL;
-
- nh_pos = skb_network_header(skb) - skb->data;
- h_pos = skb_transport_header(skb) - skb->data;
-
- /* convert Ethernet header to proper 802.11 header (based on
- * operation mode) */
- ethertype = (skb->data[12] << 8) | skb->data[13];
- fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
-
- switch (iftype) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_GO:
- fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
- /* DA BSSID SA */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, addr, ETH_ALEN);
- memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
- /* BSSID SA DA */
- memcpy(hdr.addr1, bssid, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, skb->data, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_OCB:
- case NL80211_IFTYPE_ADHOC:
- /* DA SA BSSID */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, bssid, ETH_ALEN);
- hdrlen = 24;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- if (qos) {
- fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
- hdrlen += 2;
- }
-
- hdr.frame_control = fc;
- hdr.duration_id = 0;
- hdr.seq_ctrl = 0;
-
- skip_header_bytes = ETH_HLEN;
- if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
- encaps_data = bridge_tunnel_header;
- encaps_len = sizeof(bridge_tunnel_header);
- skip_header_bytes -= 2;
- } else if (ethertype >= ETH_P_802_3_MIN) {
- encaps_data = rfc1042_header;
- encaps_len = sizeof(rfc1042_header);
- skip_header_bytes -= 2;
- } else {
- encaps_data = NULL;
- encaps_len = 0;
- }
-
- skb_pull(skb, skip_header_bytes);
- nh_pos -= skip_header_bytes;
- h_pos -= skip_header_bytes;
-
- head_need = hdrlen + encaps_len - skb_headroom(skb);
-
- if (head_need > 0 || skb_cloned(skb)) {
- head_need = max(head_need, 0);
- if (head_need)
- skb_orphan(skb);
-
- if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
-
- if (encaps_data) {
- memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- nh_pos += encaps_len;
- h_pos += encaps_len;
- }
-
- memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
-
- nh_pos += hdrlen;
- h_pos += hdrlen;
-
- /* Update skb pointers to various headers since this modified frame
- * is going to go through Linux networking code that may potentially
- * need things like pointer to IP header. */
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, nh_pos);
- skb_set_transport_header(skb, h_pos);
-
- return 0;
-}
-EXPORT_SYMBOL(ieee80211_data_from_8023);
-
static void
__frame_add_frag(struct sk_buff *skb, struct page *page,
void *ptr, int len, int size)
@@ -963,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
case EVENT_STOPPED:
__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
break;
+ case EVENT_PORT_AUTHORIZED:
+ __cfg80211_port_authorized(wdev, ev->pa.bssid);
+ break;
}
wdev_unlock(wdev);
@@ -1367,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
}
EXPORT_SYMBOL(cfg80211_get_p2p_attr);
-static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
{
int i;
- for (i = 0; i < n_ids; i++)
- if (ids[i] == id)
+ /* Make sure array values are legal */
+ if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
+ return false;
+
+ i = 0;
+ while (i < n_ids) {
+ if (ids[i] == WLAN_EID_EXTENSION) {
+ if (id_ext && (ids[i + 1] == id))
+ return true;
+
+ i += 2;
+ continue;
+ }
+
+ if (ids[i] == id && !id_ext)
return true;
+
+ i++;
+ }
return false;
}
@@ -1403,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
{
size_t pos = offset;
- while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
+ while (pos < ielen) {
+ u8 ext = 0;
+
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
+ ies[pos] == WLAN_EID_EXTENSION))
+ break;
+
if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
pos = skip_ie(ies, ielen, pos);
- while (pos < ielen &&
- !ieee80211_id_in_list(after_ric, n_after_ric,
- ies[pos]))
- pos = skip_ie(ies, ielen, pos);
+ while (pos < ielen) {
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ else
+ ext = 0;
+
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(after_ric,
+ n_after_ric,
+ ies[pos + ext],
+ ext == 2))
+ pos = skip_ie(ies, ielen, pos);
+ }
} else {
pos = skip_ie(ies, ielen, pos);
}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5d4a02c7..7ca04a7 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 - wext compat code
*
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c434f19..c67d7a8 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* cfg80211 wext compat for managed mode.
*
diff --git a/net/x25/Makefile b/net/x25/Makefile
index a2c34ab..5dd544a 100644
--- a/net/x25/Makefile
+++ b/net/x25/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Linux X.25 Packet layer.
#
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ac09593..ea87143 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
/*
* handler for deferred kills.
*/
-static void x25_destroy_timer(unsigned long data)
+static void x25_destroy_timer(struct timer_list *t)
{
- x25_destroy_socket_from_timer((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ x25_destroy_socket_from_timer(sk);
}
/*
@@ -413,8 +415,7 @@ static void __x25_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
sk->sk_timer.expires = jiffies + 10 * HZ;
- sk->sk_timer.function = x25_destroy_timer;
- sk->sk_timer.data = (unsigned long)sk;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer;
add_timer(&sk->sk_timer);
} else {
/* drop last reference so sock_put will free */
diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c
index ba078c8..e9802af 100644
--- a/net/x25/sysctl_net_x25.c
+++ b/net/x25/sysctl_net_x25.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* -*- linux-c -*-
* sysctl_net_x25.c: sysctl interface to net X.25 subsystem.
*
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b..ad1734d 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
*vc_fac_mask |= X25_MASK_REVERSE;
break;
}
-
+ /*fall through */
case X25_FAC_THROUGHPUT:
facilities->throughput = p[1];
*vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac5009..3c12cae 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
case X25_RESET_REQUEST:
x25_write_internal(sk, X25_RESET_CONFIRMATION);
+ /* fall through */
case X25_RESET_CONFIRMATION: {
x25_stop_timer(sk);
x25->condition = 0x00;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a..1dfba3c 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
#include <net/tcp_states.h>
#include <net/x25.h>
-static void x25_heartbeat_expiry(unsigned long);
-static void x25_timer_expiry(unsigned long);
+static void x25_heartbeat_expiry(struct timer_list *t);
+static void x25_timer_expiry(struct timer_list *t);
void x25_init_timers(struct sock *sk)
{
struct x25_sock *x25 = x25_sk(sk);
- setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
+ timer_setup(&x25->timer, x25_timer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &x25_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry;
}
void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
return x25->timer.expires - jiffies;
}
-static void x25_heartbeat_expiry(unsigned long param)
+static void x25_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
}
}
-static void x25_timer_expiry(unsigned long param)
+static void x25_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct x25_sock *x25 = from_timer(x25, t, timer);
+ struct sock *sk = &x25->sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 55b2ac3..0bd2465 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the XFRM subsystem.
#
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index acf0010..30e5746 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -91,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
}
if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) {
+ xso->dev = NULL;
dev_put(dev);
return 0;
}
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 1e98bc0..2ad33ce 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* xfrm_hash.c: Common hash table code.
*
* Copyright (C) 2006 David S. Miller (davem@davemloft.net)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index eaea9c4..61be810 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _XFRM_HASH_H
#define _XFRM_HASH_H
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 2515cd2..82d20ee 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* xfrm_input.c
*
@@ -429,7 +430,8 @@ resume:
nf_reset(skb);
if (decaps) {
- skb->sp->olen = 0;
+ if (skb->sp)
+ skb->sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return 0;
@@ -440,7 +442,8 @@ resume:
err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async);
if (xfrm_gro) {
- skb->sp->olen = 0;
+ if (skb->sp)
+ skb->sp->olen = 0;
skb_dst_drop(skb);
gro_cells_receive(&gro_cells, skb);
return err;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 31a2e6d..73ad8c8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -105,6 +105,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
if (xfrm_offload(skb)) {
x->type_offload->encap(x, skb);
} else {
+ /* Inner headers are invalid now. */
+ skb->encapsulation = 0;
+
err = x->type->output(x, skb);
if (err == -EINPROGRESS)
goto out;
@@ -208,7 +211,6 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
int err;
secpath_reset(skb);
- skb->encapsulation = 0;
if (xfrm_dev_offload_ok(skb, x)) {
struct sec_path *sp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f062539..f02b174 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -57,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-static void xfrm_policy_queue_process(unsigned long arg);
+static void xfrm_policy_queue_process(struct timer_list *t);
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -179,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
return secs*HZ;
}
-static void xfrm_policy_timer(unsigned long data)
+static void xfrm_policy_timer(struct timer_list *t)
{
- struct xfrm_policy *xp = (struct xfrm_policy *)data;
+ struct xfrm_policy *xp = from_timer(xp, t, timer);
unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
@@ -267,10 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
rwlock_init(&policy->lock);
refcount_set(&policy->refcnt, 1);
skb_queue_head_init(&policy->polq.hold_queue);
- setup_timer(&policy->timer, xfrm_policy_timer,
- (unsigned long)policy);
- setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
- (unsigned long)policy);
+ timer_setup(&policy->timer, xfrm_policy_timer, 0);
+ timer_setup(&policy->polq.hold_timer,
+ xfrm_policy_queue_process, 0);
}
return policy;
}
@@ -1573,6 +1572,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
goto put_states;
}
+ if (!dst_prev)
+ dst0 = dst1;
+ else
+ /* Ref count is taken during xfrm_alloc_dst()
+ * No need to do dst_clone() on dst1
+ */
+ dst_prev->child = dst1;
+
if (xfrm[i]->sel.family == AF_UNSPEC) {
inner_mode = xfrm_ip2inner_mode(xfrm[i],
xfrm_af2proto(family));
@@ -1584,14 +1591,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
} else
inner_mode = xfrm[i]->inner_mode;
- if (!dst_prev)
- dst0 = dst1;
- else
- /* Ref count is taken during xfrm_alloc_dst()
- * No need to do dst_clone() on dst1
- */
- dst_prev->child = dst1;
-
xdst->route = dst;
dst_copy_metrics(dst1, dst);
@@ -1852,12 +1851,12 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
return xdst;
}
-static void xfrm_policy_queue_process(unsigned long arg)
+static void xfrm_policy_queue_process(struct timer_list *t)
{
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
- struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+ struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
struct net *net = xp_net(pol);
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
@@ -2076,7 +2075,6 @@ make_dummy_bundle:
xdst->num_xfrms = num_xfrms;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
- dst_hold(&xdst->u.dst);
return xdst;
inc_error:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 0dab1cd..1f5cee2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -732,12 +732,12 @@ restart:
}
}
}
+out:
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
if (cnt) {
err = 0;
xfrm_policy_cache_flush();
}
-out:
- spin_unlock_bh(&net->xfrm.xfrm_state_lock);
return err;
}
EXPORT_SYMBOL(xfrm_state_flush);
@@ -2069,6 +2069,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
if (err >= 0) {
xfrm_sk_policy_insert(sk, err, pol);
xfrm_pol_put(pol);
+ __sk_dst_reset(sk);
err = 0;
}
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 35a7e79..0c6c5ef 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <net/net_namespace.h>
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2bfbd91..983b023 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < xfrm_alg_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_len(algp))
return -EINVAL;
switch (type) {
@@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < xfrm_alg_auth_len(algp))
+ if (nla_len(rt) < (int)xfrm_alg_auth_len(algp))
return -EINVAL;
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs)
return 0;
algp = nla_data(rt);
- if (nla_len(rt) < aead_len(algp))
+ if (nla_len(rt) < (int)aead_len(algp))
return -EINVAL;
algp->alg_name[sizeof(algp->alg_name) - 1] = '\0';
@@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
return -EINVAL;
- if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
+ if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) &&
nla_len(rt) != sizeof(*rs))
return -EINVAL;
}
@@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
struct nlattr *rp)
{
struct xfrm_replay_state_esn *up;
- int ulen;
+ unsigned int ulen;
if (!replay_esn || !rp)
return 0;
@@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
/* Check the overall length and the internal bitmap length to avoid
* potential overflow. */
- if (nla_len(rp) < ulen ||
+ if (nla_len(rp) < (int)ulen ||
xfrm_replay_state_esn_len(replay_esn) != ulen ||
replay_esn->bmp_len != up->bmp_len)
return -EINVAL;
@@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
struct nlattr *rta)
{
struct xfrm_replay_state_esn *p, *pp, *up;
- int klen, ulen;
+ unsigned int klen, ulen;
if (!rta)
return 0;
up = nla_data(rta);
klen = xfrm_replay_state_esn_len(up);
- ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
+ ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up);
p = kzalloc(klen, GFP_KERNEL);
if (!p)
@@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
return 0;
}
-static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
+static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx)
{
- int len = 0;
+ unsigned int len = 0;
if (xfrm_ctx) {
len += sizeof(struct xfrm_user_sec_ctx);
@@ -657,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0) {
x->km.state = XFRM_STATE_DEAD;
+ xfrm_dev_state_delete(x);
__xfrm_state_put(x);
goto out;
}
@@ -1031,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb,
return -1;
}
-static inline size_t xfrm_spdinfo_msgsize(void)
+static inline unsigned int xfrm_spdinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
+ nla_total_size(sizeof(struct xfrmu_spdinfo))
@@ -1146,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 *flags = nlmsg_data(nlh);
u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
+ int err;
r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)
- BUG();
+ err = build_spdinfo(r_skb, net, sportid, seq, *flags);
+ BUG_ON(err < 0);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
-static inline size_t xfrm_sadinfo_msgsize(void)
+static inline unsigned int xfrm_sadinfo_msgsize(void)
{
return NLMSG_ALIGN(4)
+ nla_total_size(sizeof(struct xfrmu_sadhinfo))
@@ -1204,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 *flags = nlmsg_data(nlh);
u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
+ int err;
r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)
- BUG();
+ err = build_sadinfo(r_skb, net, sportid, seq, *flags);
+ BUG_ON(err < 0);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
@@ -1633,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s
return copy_sec_ctx(xp->security, skb);
return 0;
}
-static inline size_t userpolicy_type_attrsize(void)
+static inline unsigned int userpolicy_type_attrsize(void)
{
#ifdef CONFIG_XFRM_SUB_POLICY
return nla_total_size(sizeof(struct xfrm_userpolicy_type));
@@ -1692,32 +1695,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
static int xfrm_dump_policy_done(struct netlink_callback *cb)
{
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct net *net = sock_net(cb->skb->sk);
xfrm_policy_walk_done(walk, net);
return 0;
}
+static int xfrm_dump_policy_start(struct netlink_callback *cb)
+{
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+
+ BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
+
+ xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+ return 0;
+}
+
static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct xfrm_dump_info info;
- BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
- sizeof(cb->args) - sizeof(cb->args[0]));
-
info.in_skb = cb->skb;
info.out_skb = skb;
info.nlmsg_seq = cb->nlh->nlmsg_seq;
info.nlmsg_flags = NLM_F_MULTI;
- if (!cb->args[0]) {
- cb->args[0] = 1;
- xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
- }
-
(void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
return skb->len;
@@ -1850,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
-static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x)
+static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x)
{
- size_t replay_size = x->replay_esn ?
+ unsigned int replay_size = x->replay_esn ?
xfrm_replay_state_esn_len(x->replay_esn) :
sizeof(struct xfrm_replay_state);
@@ -1957,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
- if (build_aevent(r_skb, x, &c) < 0)
- BUG();
+ err = build_aevent(r_skb, x, &c);
+ BUG_ON(err < 0);
+
err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
@@ -2321,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff
return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
}
-static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma,
- int with_encp)
+static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma,
+ int with_encp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))
+ (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0)
@@ -2385,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
{
struct net *net = &init_net;
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap),
GFP_ATOMIC);
@@ -2392,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
return -ENOMEM;
/* build migrate */
- if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0)
- BUG();
+ err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE);
}
@@ -2473,6 +2480,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
const struct nla_policy *nla_pol;
@@ -2486,6 +2494,7 @@ static const struct xfrm_link {
[XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy },
[XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy },
[XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+ .start = xfrm_dump_policy_start,
.dump = xfrm_dump_policy,
.done = xfrm_dump_policy_done },
[XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2538,6 +2547,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct netlink_dump_control c = {
+ .start = link->start,
.dump = link->dump,
.done = link->done,
};
@@ -2566,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb)
mutex_unlock(&net->xfrm.xfrm_cfg_mutex);
}
-static inline size_t xfrm_expire_msgsize(void)
+static inline unsigned int xfrm_expire_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_expire))
+ nla_total_size(sizeof(struct xfrm_mark));
@@ -2617,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_aevent(skb, x, c) < 0)
- BUG();
+ err = build_aevent(skb, x, c);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS);
}
@@ -2654,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA);
}
-static inline size_t xfrm_sa_len(struct xfrm_state *x)
+static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
{
- size_t l = 0;
+ unsigned int l = 0;
if (x->aead)
l += nla_total_size(aead_len(x->aead));
if (x->aalg) {
@@ -2701,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
struct xfrm_usersa_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- int len = xfrm_sa_len(x);
- int headlen, err;
+ unsigned int len = xfrm_sa_len(x);
+ unsigned int headlen;
+ int err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELSA) {
@@ -2776,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c
}
-static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
- struct xfrm_policy *xp)
+static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x,
+ struct xfrm_policy *xp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2829,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_acquire(skb, x, xt, xp) < 0)
- BUG();
+ err = build_acquire(skb, x, xt, xp);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE);
}
@@ -2900,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
return xp;
}
-static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp)
+static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))
+ nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr)
@@ -2944,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct
{
struct net *net = xp_net(xp);
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_polexpire(skb, xp, dir, c) < 0)
- BUG();
+ err = build_polexpire(skb, xp, dir, c);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE);
}
static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c)
{
- int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
+ unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
struct net *net = xp_net(xp);
struct xfrm_userpolicy_info *p;
struct xfrm_userpolicy_id *id;
struct nlmsghdr *nlh;
struct sk_buff *skb;
- int headlen, err;
+ unsigned int headlen;
+ int err;
headlen = sizeof(*p);
if (c->event == XFRM_MSG_DELPOLICY) {
@@ -3070,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct
}
-static inline size_t xfrm_report_msgsize(void)
+static inline unsigned int xfrm_report_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_report));
}
@@ -3104,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto,
struct xfrm_selector *sel, xfrm_address_t *addr)
{
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
- if (build_report(skb, proto, sel, addr) < 0)
- BUG();
+ err = build_report(skb, proto, sel, addr);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT);
}
-static inline size_t xfrm_mapping_msgsize(void)
+static inline unsigned int xfrm_mapping_msgsize(void)
{
return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping));
}
@@ -3151,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
{
struct net *net = xs_net(x);
struct sk_buff *skb;
+ int err;
if (x->id.proto != IPPROTO_ESP)
return -EINVAL;
@@ -3162,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
if (skb == NULL)
return -ENOMEM;
- if (build_mapping(skb, x, ipaddr, sport) < 0)
- BUG();
+ err = build_mapping(skb, x, ipaddr, sport);
+ BUG_ON(err < 0);
return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING);
}
OpenPOWER on IntegriCloud