summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 06:02:27 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 06:02:27 +0200
commit9410645520e9b820069761f3450ef6661418e279 (patch)
tree597a1047e43d27aceeeaf34ffefc1326aaedd0fe /net
parent98f7e32f20d28ec452afb208f9cffc08448a2652 (diff)
parent3561373114c8b3359114e2da27259317dc51145a (diff)
downloadlwn-9410645520e9b820069761f3450ef6661418e279.tar.gz
lwn-9410645520e9b820069761f3450ef6661418e279.zip
Merge tag 'net-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "The zero-copy changes are relatively significant, but regression risk should be contained. The feature needs to be used to cause trouble. Also it feels like we got an order of magnitude more semi-automated "refactoring" chaff than usual, I wonder if it's just us. Core & protocols: - Support Device Memory TCP, ability to zero-copy receive TCP payloads to a DMABUF region of memory while packet headers land separately in normal kernel buffers, and TCP processes then as usual. - The ability to read the PTP PHC (Physical Hardware Clock) alongside MONOTONIC_RAW timestamps with PTP_SYS_OFFSET_EXTENDED. Previously only CLOCK_REALTIME was supported. - Allow matching on all bits of IP DSCP for routing decisions. Previously we only supported on matching TOS bits in IPv4 which is a narrower interpretation of the same header field. - Increase the range of weights used for multi-path routing from 8 bits to 16 bits. - Add support for IPv6 PIO p flag in the Prefix Information Option per draft-ietf-6man-pio-pflag. - IPv6 IOAM6 support for new tunsrc encap mode for better performance. - Detect destinations which blackhole MPTCP traffic and avoid initiating MPTCP connections to them for a certain period of time, 1h by default. - Improve IPsec control path performance by removing the inexact policies list. - AF_VSOCK: add support for SIOCOUTQ ioctl. - Add enum for reasons TCP reset was sent for easier tracing. - Add SMC ringbufs usage statistics. Drivers: - Handle netconsole setup failures more gracefully, don't fail loading, retain the specified target as disabled. - Extend bonding's IPsec offload pass thru capabilities (ESN, stats). Filtering: - Add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_*sockopt() to address the case when long-lived sockets miss a chance to set additional callbacks if a sockops program was not attached early in their lifetime. - Support using BPF skb helpers in tracepoints. - Conntrack Netlink: support CTA_FILTER for flush. - Improve SCTP support in nfnetlink_queue. - Improve performance of large nftables flush transactions. Things we sprinkled into general kernel code: - selftests: support setting an "interpreter" for script files; make it easy to run as separate cases tests where one "interpreter" is fed various test descriptions (in our case packet sequences). Driver API: - Extend core and ethtool APIs to support many PHYs connected to a single interface (PHY topologies). - Extend cable diagnostics to specify whether Time Domain Reflectometry (TDR) or Active Link Cable Diagnostic (ALCD) was used. - Add library for implementing MAC-PHY Ethernet drivers for SPI devices compatible with Open Alliance 10BASE-T1x MAC-PHY Serial Interface (TC6) standard. - Add helpers to the PHY framework, for PHYs following the Open Alliance standards: - 1000BaseT1 link settings - cable test and diagnostics - Support listing / dumping all allocated RSS contexts. - Add configuration for frequency Embedded SYNC in DPLL, which magically embeds sync pulses into Ethernet signaling. Device drivers: - Ethernet high-speed NICs: - Broadcom (bnxt): - use better FW APIs for queue reset - support QOS and TPID settings for the SR-IOV VLAN - support dynamic MSI-X allocation - Intel (100G, ice, idpf): - ice: support PCIe subfunctions - iavf: add support for TC U32 filters on VFs - ice: support Embedded SYNC in DPLL - nVidia/Mellanox (mlx5): - support HW managed steering tables - support PCIe PTM cross timestamping - AMD/Pensando: - ionic: use page_pool to increase Rx performance - Cisco (enic): - report per-queue statistics - Ethernet virtual: - Microsoft vNIC: - mana: support configuring ring length - netvsc: enable more channels on systems with many CPUs - IBM veth: - optimize polling to improve TCP_RR performance - optimize performance of Tx handling - VirtIO net: - synchronize the operstate with the admin state to allow a lower virtio-net to propagate the link status to an upper device like macvlan - Ethernet NICs consumer, and embedded: - Add driver for Realtek automotive PCIe devices (RTL9054, RTL9068, RTL9072, RTL9075, RTL9068, RTL9071) - Add driver for Microchip LAN8650/1 10BASE-T1S MAC-PHY. - Microchip: - lan743x: use phylink - support WOL, EEE, pause, link settings - add Wake-on-LAN support for KSZ87xx family - add KSZ8895/KSZ8864 switch support - factor out FDMA code and use it in sparx5 and lan966x (including DCB support in both) - Synopsys (stmmac): - support frame preemption (configured using TC and ethtool) - support Loongson DWMAC (GMAC v3.73) - support RockChips RK3576 DWMAC - TI: - am65-cpsw: add multi queue RX support - icssg-prueth: HSR offload support - Cadence (macb): - enable software (hrtimer based) IRQ coalescing by default - Xilinx (axinet): - expose HW statistics - improve multicast filtering - relax Rx checksum offload constraints - MediaTek: - mt7530: add EN7581 support - Aspeed (ftgmac100): - report link speed and duplex - Intel: - igc: add mqprio offload - igc: report EEE configuration - RealTek (r8169): - add support for RTL8126A rev.b - Vitesse (vsc73xx): - implement FDB add/del/dump operations - Freescale (fs_enet): - use phylink - Ethernet PHYs: - vitesse: implement downshift and MDI-X in vsc73xx PHYs - microchip: support LAN887x, supporting IEEE 802.3bw (100BASE-T1) and IEEE 802.3bp (1000BASE-T1) specifications - add Applied Micro QT2025 PHY driver (in Rust) - add Motorcomm yt8821 2.5G Ethernet PHY driver - CAN: - add driver for Rockchip RK3568 CAN-FD controller - flexcan: add wakeup support for imx95 - kvaser_usb: set hardware timestamp on transmitted packets - WiFi: - mac80211/cfg80211: - EHT rate support in AQL airtime fairness - handle DFS (radar detection) per link in Multi-Link Operation - RealTek (rtw89): - support RTL8852BT and 8852BE-VT (WiFi 6) - support hardware rfkill - support HW encryption in unicast management frames - support Wake-on-WLAN with supported network detection - RealTek (rtw89): - improve Rx performance by using USB frame aggregation - support USB 3 with RTL8822CU/RTL8822BU - Intel (iwlwifi/mvm): - offload RLC/SMPS functionality to firmware - Marvell (mwifiex): - add host based MLME to enable WPA3 - Bluetooth: - add support for Amlogic HCI UART protocol - add support for ISO data/packets to Intel and NXP drivers" * tag 'net-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1303 commits) net/mlx5: HWS, check the correct variable in hws_send_ring_alloc_sq() netfilter: nft_socket: Fix a NULL vs IS_ERR() bug in nft_socket_cgroup_subtree_level() ice: Fix a NULL vs IS_ERR() check in probe() ice: Fix a couple NULL vs IS_ERR() bugs net: ethernet: fs_enet: Make the per clock optional net: ti: icssg-prueth: Add multicast filtering support in HSR mode net: ti: icssg-prueth: Enable HSR Tx duplication, Tx Tag and Rx Tag offload net: ti: icssg-prueth: Add support for HSR frame forward offload net: ti: icssg-prueth: Stop hardcoding def_inc net: ti: icss-iep: Move icss_iep structure net: ibm: emac: get rid of wol_irq net: ibm: emac: remove all waiting code net: ibm: emac: replace of_get_property net: ibm: emac: use netdev's phydev directly net: ibm: emac: use devm for register_netdev net: ibm: emac: remove mii_bus with devm net: ibm: emac: use devm for of_iomap net: ibm: emac: manage emac_irq with devm net: ibm: emac: use devm for alloc_etherdev octeontx2-af: debugfs: Add Channel info to RPM map ...
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/ndisc.c6
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/8021q/vlanproc.c4
-rw-r--r--net/Kconfig6
-rw-r--r--net/batman-adv/soft-interface.c5
-rw-r--r--net/bluetooth/cmtp/Kconfig4
-rw-r--r--net/bluetooth/cmtp/capi.c32
-rw-r--r--net/bluetooth/hci_conn.c7
-rw-r--r--net/bluetooth/hci_sync.c5
-rw-r--r--net/bluetooth/leds.c2
-rw-r--r--net/bluetooth/mgmt.c13
-rw-r--r--net/bridge/br_device.c6
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c7
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c3
-rw-r--r--net/caif/cfpkt_skbuff.c6
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/j1939/transport.c8
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c6
-rw-r--r--net/core/dev.c89
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/dev_ioctl.c9
-rw-r--r--net/core/devmem.c389
-rw-r--r--net/core/devmem.h180
-rw-r--r--net/core/fib_rules.c9
-rw-r--r--net/core/filter.c25
-rw-r--r--net/core/gro.c5
-rw-r--r--net/core/lwt_bpf.c3
-rw-r--r--net/core/mp_dmabuf_devmem.h44
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c13
-rw-r--r--net/core/net_namespace.c84
-rw-r--r--net/core/netdev-genl-gen.c23
-rw-r--r--net/core/netdev-genl-gen.h6
-rw-r--r--net/core/netdev-genl.c147
-rw-r--r--net/core/netdev_rx_queue.c81
-rw-r--r--net/core/netmem_priv.h31
-rw-r--r--net/core/netpoll.c44
-rw-r--r--net/core/page_pool.c119
-rw-r--r--net/core/page_pool_priv.h46
-rw-r--r--net/core/page_pool_user.c32
-rw-r--r--net/core/pktgen.c10
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c136
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock.c74
-rw-r--r--net/core/sock_map.c1
-rw-r--r--net/core/sock_reuseport.c5
-rw-r--r--net/core/utils.c2
-rw-r--r--net/dsa/tag_ksz.c11
-rw-r--r--net/dsa/user.c3
-rw-r--r--net/ethtool/Makefile3
-rw-r--r--net/ethtool/cabletest.c57
-rw-r--r--net/ethtool/channels.c20
-rw-r--r--net/ethtool/cmis.h1
-rw-r--r--net/ethtool/cmis_cdb.c14
-rw-r--r--net/ethtool/common.c61
-rw-r--r--net/ethtool/common.h7
-rw-r--r--net/ethtool/ioctl.c44
-rw-r--r--net/ethtool/linkinfo.c2
-rw-r--r--net/ethtool/linkmodes.c2
-rw-r--r--net/ethtool/netlink.c68
-rw-r--r--net/ethtool/netlink.h37
-rw-r--r--net/ethtool/phy.c306
-rw-r--r--net/ethtool/plca.c30
-rw-r--r--net/ethtool/pse-pd.c38
-rw-r--r--net/ethtool/rss.c233
-rw-r--r--net/ethtool/strset.c27
-rw-r--r--net/handshake/netlink.c4
-rw-r--r--net/hsr/hsr_device.c46
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/hsr/hsr_main.h5
-rw-r--r--net/hsr/hsr_netlink.c2
-rw-r--r--net/hsr/hsr_slave.c11
-rw-r--r--net/ieee802154/6lowpan/core.c2
-rw-r--r--net/ieee802154/core.c10
-rw-r--r--net/ipv4/Kconfig3
-rw-r--r--net/ipv4/devinet.c53
-rw-r--r--net/ipv4/esp4.c3
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fib_rules.c54
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/icmp.c119
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/ip_gre.c7
-rw-r--r--net/ipv4/ip_input.c6
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_tunnel.c15
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c3
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c3
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c5
-rw-r--r--net/ipv4/nexthop.c55
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/sysctl_net_ipv4.c32
-rw-r--r--net/ipv4/tcp.c291
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_metrics.c10
-rw-r--r--net/ipv4/tcp_minisocks.c33
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_tunnel_core.c3
-rw-r--r--net/ipv6/addrconf.c26
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/ipv6/fib6_rules.c43
-rw-r--r--net/ipv6/icmp.c28
-rw-r--r--net/ipv6/inet6_hashtables.c15
-rw-r--r--net/ipv6/ioam6_iptunnel.c86
-rw-r--r--net/ipv6/ip6_gre.c7
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_tunnel.c11
-rw-r--r--net/ipv6/ip6mr.c5
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/mcast.c5
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c4
-rw-r--r--net/ipv6/route.c8
-rw-r--r--net/ipv6/rpl_iptunnel.c12
-rw-r--r--net/ipv6/sit.c11
-rw-r--r--net/ipv6/tcp_ipv6.c5
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/l2tp/l2tp_core.c382
-rw-r--r--net/l2tp/l2tp_core.h25
-rw-r--r--net/l2tp/l2tp_debugfs.c24
-rw-r--r--net/l2tp/l2tp_eth.c44
-rw-r--r--net/l2tp/l2tp_ip.c125
-rw-r--r--net/l2tp/l2tp_ip6.c123
-rw-r--r--net/l2tp/l2tp_netlink.c76
-rw-r--r--net/l2tp/l2tp_ppp.c154
-rw-r--r--net/mac80211/agg-rx.c15
-rw-r--r--net/mac80211/agg-tx.c15
-rw-r--r--net/mac80211/airtime.c140
-rw-r--r--net/mac80211/cfg.c49
-rw-r--r--net/mac80211/chan.c5
-rw-r--r--net/mac80211/ht.c15
-rw-r--r--net/mac80211/ieee80211_i.h33
-rw-r--r--net/mac80211/iface.c25
-rw-r--r--net/mac80211/link.c12
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/mlme.c45
-rw-r--r--net/mac80211/offchannel.c1
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c2
-rw-r--r--net/mac80211/scan.c16
-rw-r--r--net/mac80211/status.c1
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c100
-rw-r--r--net/mctp/af_mctp.c3
-rw-r--r--net/mpls/af_mpls.c6
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/mptcp/ctrl.c133
-rw-r--r--net/mptcp/mib.c7
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/pm.c11
-rw-r--r--net/mptcp/pm_netlink.c78
-rw-r--r--net/mptcp/pm_userspace.c40
-rw-r--r--net/mptcp/protocol.c18
-rw-r--r--net/mptcp/protocol.h33
-rw-r--r--net/mptcp/subflow.c54
-rw-r--r--net/netfilter/core.c4
-rw-r--r--net/netfilter/nf_conncount.c15
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c9
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_tables_api.c201
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nfnetlink.c14
-rw-r--r--net/netfilter/nfnetlink_queue.c12
-rw-r--r--net/netfilter/nft_bitwise.c4
-rw-r--r--net/netfilter/nft_byteorder.c2
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_compat.c6
-rw-r--r--net/netfilter/nft_counter.c90
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dup_netdev.c2
-rw-r--r--net/netfilter/nft_dynset.c22
-rw-r--r--net/netfilter/nft_exthdr.c2
-rw-r--r--net/netfilter/nft_fib.c3
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_fwd_netdev.c9
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_lookup.c5
-rw-r--r--net/netfilter/nft_masq.c7
-rw-r--r--net/netfilter/nft_meta.c8
-rw-r--r--net/netfilter/nft_nat.c11
-rw-r--r--net/netfilter/nft_objref.c2
-rw-r--r--net/netfilter/nft_osf.c3
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_queue.c5
-rw-r--r--net/netfilter/nft_range.c2
-rw-r--r--net/netfilter/nft_redir.c7
-rw-r--r--net/netfilter/nft_reject.c3
-rw-r--r--net/netfilter/nft_reject_inet.c3
-rw-r--r--net/netfilter/nft_reject_netdev.c3
-rw-r--r--net/netfilter/nft_rt.c3
-rw-r--r--net/netfilter/nft_socket.c7
-rw-r--r--net/netfilter/nft_synproxy.c3
-rw-r--r--net/netfilter/nft_tproxy.c7
-rw-r--r--net/netfilter/nft_xfrm.c3
-rw-r--r--net/netfilter/xt_connlimit.c15
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/netrom/nr_route.c4
-rw-r--r--net/openvswitch/actions.c8
-rw-r--r--net/openvswitch/conntrack.c35
-rw-r--r--net/openvswitch/datapath.h3
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-internal_dev.c11
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rds/Kconfig9
-rw-r--r--net/rds/Makefile5
-rw-r--r--net/rds/ib.h4
-rw-r--r--net/rfkill/core.c8
-rw-r--r--net/rfkill/rfkill-gpio.c18
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/sched/act_ct.c4
-rw-r--r--net/sched/act_vlan.c1
-rw-r--r--net/sched/sch_cake.c53
-rw-r--r--net/sched/sch_taprio.c4
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/smc/af_smc.c8
-rw-r--r--net/smc/smc_clc.h4
-rw-r--r--net/smc/smc_core.c72
-rw-r--r--net/smc/smc_core.h2
-rw-r--r--net/smc/smc_loopback.h1
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_stats.c6
-rw-r--r--net/smc/smc_stats.h28
-rw-r--r--net/smc/smc_sysctl.c11
-rw-r--r--net/socket.c10
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/bearer.c10
-rw-r--r--net/tipc/monitor.c2
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/unix/af_unix.c80
-rw-r--r--net/unix/garbage.c16
-rw-r--r--net/vmw_vsock/af_vsock.c58
-rw-r--r--net/vmw_vsock/virtio_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport_common.c35
-rw-r--r--net/vmw_vsock/vsock_loopback.c6
-rw-r--r--net/wireless/core.c10
-rw-r--r--net/wireless/core.h8
-rw-r--r--net/wireless/ibss.c2
-rw-r--r--net/wireless/lib80211.c10
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c2
-rw-r--r--net/wireless/lib80211_crypt_tkip.c2
-rw-r--r--net/wireless/lib80211_crypt_wep.c2
-rw-r--r--net/wireless/mesh.c2
-rw-r--r--net/wireless/mlme.c20
-rw-r--r--net/wireless/nl80211.c77
-rw-r--r--net/wireless/rdev-ops.h13
-rw-r--r--net/wireless/reg.c19
-rw-r--r--net/wireless/scan.c45
-rw-r--r--net/wireless/sme.c3
-rw-r--r--net/wireless/trace.h40
-rw-r--r--net/wireless/util.c14
-rw-r--r--net/xdp/xsk_buff_pool.c40
-rw-r--r--net/xdp/xsk_queue.h5
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_interface_core.c2
-rw-r--r--net/xfrm/xfrm_policy.c225
281 files changed, 5349 insertions, 2048 deletions
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index 16be8f8b2f8c..c40b98f7743c 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -11,11 +11,6 @@
#include "6lowpan_i.h"
-static int lowpan_ndisc_is_useropt(u8 nd_opt_type)
-{
- return nd_opt_type == ND_OPT_6CO;
-}
-
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
#define NDISC_802154_SHORT_ADDR_LENGTH 1
static int lowpan_ndisc_parse_802154_options(const struct net_device *dev,
@@ -222,7 +217,6 @@ static void lowpan_ndisc_prefix_rcv_add_addr(struct net *net,
#endif
const struct ndisc_ops lowpan_ndisc_ops = {
- .is_useropt = lowpan_ndisc_is_useropt,
#if IS_ENABLED(CONFIG_IEEE802154_6LOWPAN)
.parse_options = lowpan_ndisc_parse_options,
.update = lowpan_ndisc_update,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 217be32426b5..458040e8a0e0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -564,17 +564,20 @@ static int vlan_dev_init(struct net_device *dev)
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
- NETIF_F_ALL_FCOE;
+ NETIF_F_FCOE_CRC | NETIF_F_FSO;
if (real_dev->vlan_features & NETIF_F_HW_MACSEC)
dev->hw_features |= NETIF_F_HW_MACSEC;
- dev->features |= dev->hw_features | NETIF_F_LLTX;
+ dev->features |= dev->hw_features;
+ dev->lltx = true;
+ dev->fcoe_mtu = true;
netif_inherit_tso_max(dev, real_dev);
if (dev->features & NETIF_F_VLAN_FEATURES)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
- dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
+ dev->vlan_features = real_dev->vlan_features &
+ ~(NETIF_F_FCOE_CRC | NETIF_F_FSO);
dev->hw_enc_features = vlan_tnl_features(real_dev);
dev->mpls_features = real_dev->mpls_features;
@@ -655,7 +658,6 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
lower_features |= NETIF_F_HW_CSUM;
features = netdev_intersect_features(features, lower_features);
features |= old_features & (NETIF_F_SOFT_FEATURES | NETIF_F_GSO_SOFTWARE);
- features |= NETIF_F_LLTX;
return features;
}
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 87b959da00cd..fa67374bda49 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -238,9 +238,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
stats = dev_get_stats(vlandev, &temp);
seq_printf(seq,
- "%s VID: %d REORDER_HDR: %i dev->priv_flags: %llx\n",
+ "%s VID: %d REORDER_HDR: %i dev->priv_flags: %x\n",
vlandev->name, vlan->vlan_id,
- (int)(vlan->flags & 1), vlandev->priv_flags);
+ (int)(vlan->flags & 1), (u32)vlandev->priv_flags);
seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
diff --git a/net/Kconfig b/net/Kconfig
index d27d0deac0bf..a629f92dc86b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -66,6 +66,12 @@ config SKB_DECRYPTED
config SKB_EXTENSIONS
bool
+config NET_DEVMEM
+ def_bool y
+ depends on DMA_SHARED_BUFFER
+ depends on GENERIC_ALLOCATOR
+ depends on PAGE_POOL
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 30ecbc2ef1fd..2758aba47a2f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1020,9 +1020,10 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = batadv_softif_free;
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_NETNS_LOCAL;
- dev->features |= NETIF_F_LLTX;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
/* can't call min_mtu, because the needed variables
* have not been initialized yet
diff --git a/net/bluetooth/cmtp/Kconfig b/net/bluetooth/cmtp/Kconfig
index c8337786da6b..34e923466236 100644
--- a/net/bluetooth/cmtp/Kconfig
+++ b/net/bluetooth/cmtp/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config BT_CMTP
- tristate "CMTP protocol support"
- depends on BT_BREDR && ISDN_CAPI
+ tristate "CMTP protocol support (DEPRECATED)"
+ depends on BT_BREDR && ISDN_CAPI && DEPRECATED
help
CMTP (CAPI Message Transport Protocol) is a transport layer
for CAPI messages. CMTP is required for the Bluetooth Common
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index f3bedc3b613a..884703fda979 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -248,18 +248,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_MANUFACTURER:
- if (skb->len < CAPI_MSG_BASELEN + 15)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_MANUFACTURER_LEN,
- skb->data[CAPI_MSG_BASELEN + 14]);
-
- memset(ctrl->manu, 0, CAPI_MANUFACTURER_LEN);
- strncpy(ctrl->manu,
- skb->data + CAPI_MSG_BASELEN + 15, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 14)
+ strscpy_pad(ctrl->manu,
+ skb->data + CAPI_MSG_BASELEN + 15,
+ skb->data[CAPI_MSG_BASELEN + 14]);
break;
case CAPI_FUNCTION_GET_VERSION:
@@ -276,18 +268,10 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
break;
case CAPI_FUNCTION_GET_SERIAL_NUMBER:
- if (skb->len < CAPI_MSG_BASELEN + 17)
- break;
-
- if (!info && ctrl) {
- int len = min_t(uint, CAPI_SERIAL_LEN,
- skb->data[CAPI_MSG_BASELEN + 16]);
-
- memset(ctrl->serial, 0, CAPI_SERIAL_LEN);
- strncpy(ctrl->serial,
- skb->data + CAPI_MSG_BASELEN + 17, len);
- }
-
+ if (!info && ctrl && skb->len > CAPI_MSG_BASELEN + 16)
+ strscpy_pad(ctrl->serial,
+ skb->data + CAPI_MSG_BASELEN + 17,
+ skb->data[CAPI_MSG_BASELEN + 16]);
break;
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index c82502e213a8..d083117ee36c 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -106,8 +106,7 @@ void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
* where a timeout + cancel does indicate an actual failure.
*/
if (status && status != HCI_ERROR_UNKNOWN_CONN_ID)
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
/* The connection attempt was doing scan for new RPA, and is
* in scan phase. If params are not associated with any other
@@ -778,7 +777,6 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c
if (!d)
return -ENOMEM;
- memset(d, 0, sizeof(*d));
d->big = big;
d->sync_handle = conn->sync_handle;
@@ -1250,8 +1248,7 @@ void hci_conn_failed(struct hci_conn *conn, u8 status)
hci_le_conn_failed(conn, status);
break;
case ACL_LINK:
- mgmt_connect_failed(hdev, &conn->dst, conn->type,
- conn->dst_type, status);
+ mgmt_connect_failed(hdev, conn, status);
break;
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 5533e6f561b3..40ccdef168d7 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -5380,7 +5380,10 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
if (!e)
return 0;
- return hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
+ /* Ignore cancel errors since it should interfere with stopping
+ * of the discovery.
+ */
+ hci_remote_name_cancel_sync(hdev, &e->data.bdaddr);
}
return 0;
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index f46847632ffa..6e349704efe4 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -48,7 +48,7 @@ static int power_activate(struct led_classdev *led_cdev)
htrig = to_hci_basic_led_trigger(led_cdev->trigger);
powered = test_bit(HCI_UP, &htrig->hdev->flags);
- led_trigger_event(led_cdev->trigger, powered ? LED_FULL : LED_OFF);
+ led_set_brightness(led_cdev, powered ? LED_FULL : LED_OFF);
return 0;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 279902e8bd8a..e4f564d6f6fb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -9779,13 +9779,18 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
mgmt_pending_remove(cmd);
}
-void mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
- u8 addr_type, u8 status)
+void mgmt_connect_failed(struct hci_dev *hdev, struct hci_conn *conn, u8 status)
{
struct mgmt_ev_connect_failed ev;
- bacpy(&ev.addr.bdaddr, bdaddr);
- ev.addr.type = link_to_bdaddr(link_type, addr_type);
+ if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags)) {
+ mgmt_device_disconnected(hdev, &conn->dst, conn->type,
+ conn->dst_type, status, true);
+ return;
+ }
+
+ bacpy(&ev.addr.bdaddr, &conn->dst);
+ ev.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
ev.status = mgmt_status(status);
mgmt_event(MGMT_EV_CONNECT_FAILED, hdev, &ev, sizeof(ev), NULL);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index fb1115857e49..26b79feb385d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -487,9 +487,11 @@ void br_dev_setup(struct net_device *dev)
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
+ dev->lltx = true;
+ dev->netns_local = true;
- dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
- NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+ dev->features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
dev->vlan_features = COMMON_FEATURES;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 8f9c19d992ac..0e8bc0ea6175 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -36,6 +36,7 @@
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
#include <linux/uaccess.h>
#include "br_private.h"
@@ -402,7 +403,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
- RT_TOS(iph->tos), 0,
+ iph->tos & INET_DSCP_MASK, 0,
RT_SCOPE_UNIVERSE);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cbd0e3586c3f..3e67d4aff419 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1256,7 +1256,7 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
goto free_unlock;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
if (newinfo->nentries)
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index bd4d1b4d745f..d12a221366d6 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -142,7 +142,7 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
@@ -168,8 +168,7 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track,
}
static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -179,7 +178,7 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
hooks = 1 << NF_BR_PRE_ROUTING;
break;
default:
- return nft_meta_set_validate(ctx, expr, data);
+ return nft_meta_set_validate(ctx, expr);
}
return nft_chain_validate_hooks(ctx->chain, hooks);
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 71b54fed7263..1cb5c16e97b7 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -170,8 +170,7 @@ out:
}
static int nft_reject_bridge_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_BR_PRE_ROUTING) |
(1 << NF_BR_LOCAL_IN));
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 2ae8cfa3df88..96236d21b18e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -298,10 +298,8 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
return dstpkt;
}
- if (expectlen > addlen)
- neededtailspace = expectlen;
- else
- neededtailspace = addlen;
+
+ neededtailspace = max(expectlen, addlen);
if (dst->tail + neededtailspace > dst->end) {
/* Create a dumplicate of 'dst' with more tail space */
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 47901bd4def1..94ad09e36df2 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -47,7 +47,6 @@ struct chnl_net {
struct caif_connect_request conn_req;
struct list_head list_field;
struct net_device *netdev;
- char name[256];
wait_queue_head_t netmgmt_wq;
/* Flow status to remember and control the transmission. */
bool flowenabled;
@@ -347,7 +346,6 @@ static int chnl_net_init(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- strncpy(priv->name, dev->name, sizeof(priv->name));
INIT_LIST_HEAD(&priv->list_field);
return 0;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 46d3ec3aa44b..217049fa496e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1471,8 +1471,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
/* remove device reference, if this is our bound device */
if (bo->bound && bo->ifindex == dev->ifindex) {
#if IS_ENABLED(CONFIG_PROC_FS)
- if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read)
+ if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) {
remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir);
+ bo->bcm_proc_read = NULL;
+ }
#endif
bo->bound = 0;
bo->ifindex = 0;
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 4be73de5033c..319f47df3330 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1179,10 +1179,10 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
break;
case -ENETDOWN:
/* In this case we should get a netdev_event(), all active
- * sessions will be cleared by
- * j1939_cancel_all_active_sessions(). So handle this as an
- * error, but let j1939_cancel_all_active_sessions() do the
- * cleanup including propagation of the error to user space.
+ * sessions will be cleared by j1939_cancel_active_session().
+ * So handle this as an error, but let
+ * j1939_cancel_active_session() do the cleanup including
+ * propagation of the error to user space.
*/
break;
case -EOVERFLOW:
diff --git a/net/core/Makefile b/net/core/Makefile
index 62be9aef2528..c3ebbaf9c81e 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
obj-y += hotdata.o
+obj-y += netdev_rx_queue.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
@@ -43,3 +44,4 @@ obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
+obj-$(CONFIG_NET_DEVMEM) += devmem.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a40f733b37d7..f0693707aece 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -407,6 +407,9 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
return 0;
}
+ if (!skb_frags_readable(skb))
+ goto short_copy;
+
/* Copy paged appendix. Hmm... why does this look so complicated? */
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -623,6 +626,9 @@ int zerocopy_fill_skb_from_iter(struct sk_buff *skb,
{
int frag = skb_shinfo(skb)->nr_frags;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
while (length && iov_iter_count(from)) {
struct page *head, *last_head = NULL;
struct page *pages[MAX_SKB_FRAGS];
diff --git a/net/core/dev.c b/net/core/dev.c
index f66e61407883..1e740faf9e78 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -158,8 +158,10 @@
#include <net/page_pool/types.h>
#include <net/page_pool/helpers.h>
#include <net/rps.h>
+#include <linux/phy_link_topology.h>
#include "dev.h"
+#include "devmem.h"
#include "net-sysfs.h"
static DEFINE_SPINLOCK(ptype_lock);
@@ -3310,6 +3312,10 @@ int skb_checksum_help(struct sk_buff *skb)
return -EINVAL;
}
+ if (!skb_frags_readable(skb)) {
+ return -EFAULT;
+ }
+
/* Before computing a checksum, we should make sure no frag could
* be modified by an external entity : checksum could be wrong.
*/
@@ -3386,6 +3392,7 @@ int skb_crc32c_csum_help(struct sk_buff *skb)
out:
return ret;
}
+EXPORT_SYMBOL(skb_crc32c_csum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
@@ -3431,8 +3438,9 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
if (!(dev->features & NETIF_F_HIGHDMA)) {
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = skb_frag_page(frag);
- if (PageHighMem(skb_frag_page(frag)))
+ if (page && PageHighMem(page))
return 1;
}
}
@@ -3705,7 +3713,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
next = skb->next;
skb_mark_not_on_list(skb);
- /* in case skb wont be segmented, point to itself */
+ /* in case skb won't be segmented, point to itself */
skb->prev = skb;
skb = validate_xmit_skb(skb, dev, again);
@@ -4245,13 +4253,6 @@ u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
}
EXPORT_SYMBOL(dev_pick_tx_zero);
-u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
- struct net_device *sb_dev)
-{
- return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
-}
-EXPORT_SYMBOL(dev_pick_tx_cpu_id);
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
@@ -5725,10 +5726,9 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct packet_type *pt_curr = NULL;
/* Current (common) orig_dev of sublist */
struct net_device *od_curr = NULL;
- struct list_head sublist;
struct sk_buff *skb, *next;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
@@ -5866,9 +5866,8 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
void netif_receive_skb_list_internal(struct list_head *head)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(READ_ONCE(net_hotdata.tstamp_prequeue),
skb);
@@ -9272,7 +9271,7 @@ EXPORT_SYMBOL(netdev_port_same_parent_id);
*/
int dev_change_proto_down(struct net_device *dev, bool proto_down)
{
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
+ if (!dev->change_proto_down)
return -EOPNOTSUPP;
if (!netif_device_present(dev))
return -ENODEV;
@@ -9369,6 +9368,20 @@ u8 dev_xdp_prog_count(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ if (!dev->netdev_ops->ndo_bpf)
+ return -EOPNOTSUPP;
+
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider");
+ return -EBUSY;
+ }
+
+ return dev->netdev_ops->ndo_bpf(dev, bpf);
+}
+EXPORT_SYMBOL_GPL(dev_xdp_propagate);
+
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
struct bpf_prog *prog = dev_xdp_prog(dev, mode);
@@ -9397,6 +9410,11 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
struct netdev_bpf xdp;
int err;
+ if (dev_get_min_mp_channel_count(dev)) {
+ NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider");
+ return -EBUSY;
+ }
+
memset(&xdp, 0, sizeof(xdp));
xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
xdp.extack = extack;
@@ -9821,6 +9839,20 @@ err_out:
return err;
}
+u32 dev_get_min_mp_channel_count(const struct net_device *dev)
+{
+ int i;
+
+ ASSERT_RTNL();
+
+ for (i = dev->real_num_rx_queues - 1; i >= 0; i--)
+ if (dev->_rx[i].mp_params.mp_priv)
+ /* The channel count is the idx plus 1. */
+ return i + 1;
+
+ return 0;
+}
+
/**
* dev_index_reserve() - allocate an ifindex in a namespace
* @net: the applicable net namespace
@@ -10321,6 +10353,17 @@ static void netdev_do_free_pcpu_stats(struct net_device *dev)
}
}
+static void netdev_free_phy_link_topology(struct net_device *dev)
+{
+ struct phy_link_topology *topo = dev->link_topo;
+
+ if (IS_ENABLED(CONFIG_PHYLIB) && topo) {
+ xa_destroy(&topo->phys);
+ kfree(topo);
+ dev->link_topo = NULL;
+ }
+}
+
/**
* register_netdevice() - register a network device
* @dev: device to register
@@ -10868,7 +10911,7 @@ noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset)
return;
}
- field = (__force unsigned long __percpu *)((__force void *)p + offset);
+ field = (unsigned long __percpu *)((void __percpu *)p + offset);
this_cpu_inc(*field);
}
EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
@@ -11099,6 +11142,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
#ifdef CONFIG_NET_SCHED
hash_init(dev->qdisc_hash);
#endif
+
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
@@ -11120,7 +11164,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (!dev->ethtool)
goto free_all;
- strcpy(dev->name, name);
+ strscpy(dev->name, name);
dev->name_assign_type = name_assign_type;
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
@@ -11191,6 +11235,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
+ netdev_free_phy_link_topology(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
@@ -11343,6 +11389,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);
+ dev_dmabuf_uninstall(dev);
netdev_offload_xstats_disable_all(dev);
@@ -11407,7 +11454,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
* @head: list of devices
*
* Note: As most callers use a stack allocated list_head,
- * we force a list_del() to make sure stack wont be corrupted later.
+ * we force a list_del() to make sure stack won't be corrupted later.
*/
void unregister_netdevice_many(struct list_head *head)
{
@@ -11462,10 +11509,10 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
/* Don't allow namespace local devices to be moved. */
err = -EINVAL;
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
goto out;
- /* Ensure the device has been registrered */
+ /* Ensure the device has been registered */
if (dev->reg_state != NETREG_REGISTERED)
goto out;
@@ -11844,7 +11891,7 @@ static void __net_exit default_device_exit_net(struct net *net)
char fb_name[IFNAMSIZ];
/* Ignore unmoveable devices (i.e. loopback) */
- if (dev->features & NETIF_F_NETNS_LOCAL)
+ if (dev->netns_local)
continue;
/* Leave virtual devices for the generic cleanup */
@@ -11905,7 +11952,7 @@ static struct pernet_operations __net_initdata default_device_ops = {
static void __init net_dev_struct_check(void)
{
/* TX read-mostly hotpath */
- CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags_fast);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index baa63dee2829..166e404f7c03 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -262,7 +262,7 @@ static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
}
/* This function only works where there is a strict 1-1 relationship
- * between source and destionation of they synch. If you ever need to
+ * between source and destination of they synch. If you ever need to
* sync addresses to more then 1 destination, you need to use
* __hw_addr_sync_multiple().
*/
@@ -299,8 +299,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
EXPORT_SYMBOL(__hw_addr_unsync);
/**
- * __hw_addr_sync_dev - Synchonize device's multicast list
- * @list: address list to syncronize
+ * __hw_addr_sync_dev - Synchronize device's multicast list
+ * @list: address list to synchronize
* @dev: device to sync
* @sync: function to call if address should be added
* @unsync: function to call if address should be removed
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 8592c052c0f4..473c437b6b53 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -317,8 +317,7 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* should take precedence in front of hardware timestamping provided by the
* netdev. If the netdev driver needs to perform specific actions even for PHY
* timestamping to work properly (a switch port must trap the timestamped
- * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
- * dev->priv_flags.
+ * frames and not forward them), it must set dev->see_all_hwtstamp_requests.
*/
int dev_set_hwtstamp_phylib(struct net_device *dev,
struct kernel_hwtstamp_config *cfg,
@@ -332,13 +331,13 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV;
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (phy_ts && dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_get(dev, &old_cfg);
if (err)
return err;
}
- if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) {
+ if (!phy_ts || dev->see_all_hwtstamp_requests) {
err = ops->ndo_hwtstamp_set(dev, cfg, extack);
if (err) {
if (extack->_msg)
@@ -347,7 +346,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev,
}
}
- if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS))
+ if (phy_ts && dev->see_all_hwtstamp_requests)
changed = kernel_hwtstamp_config_changed(&old_cfg, cfg);
if (phy_ts) {
diff --git a/net/core/devmem.c b/net/core/devmem.c
new file mode 100644
index 000000000000..11b91c12ee11
--- /dev/null
+++ b/net/core/devmem.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Devmem TCP
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/genalloc.h>
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <net/page_pool/helpers.h>
+#include <trace/events/page_pool.h>
+
+#include "devmem.h"
+#include "mp_dmabuf_devmem.h"
+#include "page_pool_priv.h"
+
+/* Device memory support */
+
+/* Protected by rtnl_lock() */
+static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
+
+static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
+ struct gen_pool_chunk *chunk,
+ void *not_used)
+{
+ struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
+
+ kvfree(owner->niovs);
+ kfree(owner);
+}
+
+static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_dma_addr +
+ ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+ size_t size, avail;
+
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+
+ size = gen_pool_size(binding->chunk_pool);
+ avail = gen_pool_avail(binding->chunk_pool);
+
+ if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
+ size, avail))
+ gen_pool_destroy(binding->chunk_pool);
+
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+ dma_buf_detach(binding->dmabuf, binding->attachment);
+ dma_buf_put(binding->dmabuf);
+ xa_destroy(&binding->bound_rxqs);
+ kfree(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct dmabuf_genpool_chunk_owner *owner;
+ unsigned long dma_addr;
+ struct net_iov *niov;
+ ssize_t offset;
+ ssize_t index;
+
+ dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
+ (void **)&owner);
+ if (!dma_addr)
+ return NULL;
+
+ offset = dma_addr - owner->base_dma_addr;
+ index = offset / PAGE_SIZE;
+ niov = &owner->niovs[index];
+
+ niov->pp_magic = 0;
+ niov->pp = NULL;
+ atomic_long_set(&niov->pp_ref_count, 0);
+
+ return niov;
+}
+
+void net_devmem_free_dmabuf(struct net_iov *niov)
+{
+ struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
+ unsigned long dma_addr = net_devmem_get_dma_addr(niov);
+
+ if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
+ PAGE_SIZE)))
+ return;
+
+ gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
+}
+
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int rxq_idx;
+
+ if (binding->list.next)
+ list_del(&binding->list);
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
+ WARN_ON(rxq->mp_params.mp_priv != binding);
+
+ rxq->mp_params.mp_priv = NULL;
+
+ rxq_idx = get_netdev_rx_queue_index(rxq);
+
+ WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
+ }
+
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_rx_queue *rxq;
+ u32 xa_idx;
+ int err;
+
+ if (rxq_idx >= dev->real_num_rx_queues) {
+ NL_SET_ERR_MSG(extack, "rx queue index out of range");
+ return -ERANGE;
+ }
+
+ rxq = __netif_get_rx_queue(dev, rxq_idx);
+ if (rxq->mp_params.mp_priv) {
+ NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
+ return -EEXIST;
+ }
+
+#ifdef CONFIG_XDP_SOCKETS
+ if (rxq->pool) {
+ NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
+ return -EBUSY;
+ }
+#endif
+
+ err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
+ GFP_KERNEL);
+ if (err)
+ return err;
+
+ rxq->mp_params.mp_priv = binding;
+
+ err = netdev_rx_queue_restart(dev, rxq_idx);
+ if (err)
+ goto err_xa_erase;
+
+ return 0;
+
+err_xa_erase:
+ rxq->mp_params.mp_priv = NULL;
+ xa_erase(&binding->bound_rxqs, xa_idx);
+
+ return err;
+}
+
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ static u32 id_alloc_next;
+ struct scatterlist *sg;
+ struct dma_buf *dmabuf;
+ unsigned int sg_idx, i;
+ unsigned long virtual;
+ int err;
+
+ dmabuf = dma_buf_get(dmabuf_fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!binding) {
+ err = -ENOMEM;
+ goto err_put_dmabuf;
+ }
+
+ binding->dev = dev;
+
+ err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
+ binding, xa_limit_32b, &id_alloc_next,
+ GFP_KERNEL);
+ if (err < 0)
+ goto err_free_binding;
+
+ xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
+
+ refcount_set(&binding->ref, 1);
+
+ binding->dmabuf = dmabuf;
+
+ binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
+ if (IS_ERR(binding->attachment)) {
+ err = PTR_ERR(binding->attachment);
+ NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
+ goto err_free_id;
+ }
+
+ binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
+ DMA_FROM_DEVICE);
+ if (IS_ERR(binding->sgt)) {
+ err = PTR_ERR(binding->sgt);
+ NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
+ goto err_detach;
+ }
+
+ /* For simplicity we expect to make PAGE_SIZE allocations, but the
+ * binding can be much more flexible than that. We may be able to
+ * allocate MTU sized chunks here. Leave that for future work...
+ */
+ binding->chunk_pool =
+ gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
+ if (!binding->chunk_pool) {
+ err = -ENOMEM;
+ goto err_unmap;
+ }
+
+ virtual = 0;
+ for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
+ dma_addr_t dma_addr = sg_dma_address(sg);
+ struct dmabuf_genpool_chunk_owner *owner;
+ size_t len = sg_dma_len(sg);
+ struct net_iov *niov;
+
+ owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
+ dev_to_node(&dev->dev));
+ if (!owner) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ owner->base_virtual = virtual;
+ owner->base_dma_addr = dma_addr;
+ owner->num_niovs = len / PAGE_SIZE;
+ owner->binding = binding;
+
+ err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
+ dma_addr, len, dev_to_node(&dev->dev),
+ owner);
+ if (err) {
+ kfree(owner);
+ err = -EINVAL;
+ goto err_free_chunks;
+ }
+
+ owner->niovs = kvmalloc_array(owner->num_niovs,
+ sizeof(*owner->niovs),
+ GFP_KERNEL);
+ if (!owner->niovs) {
+ err = -ENOMEM;
+ goto err_free_chunks;
+ }
+
+ for (i = 0; i < owner->num_niovs; i++) {
+ niov = &owner->niovs[i];
+ niov->owner = owner;
+ page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
+ net_devmem_get_dma_addr(niov));
+ }
+
+ virtual += len;
+ }
+
+ return binding;
+
+err_free_chunks:
+ gen_pool_for_each_chunk(binding->chunk_pool,
+ net_devmem_dmabuf_free_chunk_owner, NULL);
+ gen_pool_destroy(binding->chunk_pool);
+err_unmap:
+ dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
+ DMA_FROM_DEVICE);
+err_detach:
+ dma_buf_detach(dmabuf, binding->attachment);
+err_free_id:
+ xa_erase(&net_devmem_dmabuf_bindings, binding->id);
+err_free_binding:
+ kfree(binding);
+err_put_dmabuf:
+ dma_buf_put(dmabuf);
+ return ERR_PTR(err);
+}
+
+void dev_dmabuf_uninstall(struct net_device *dev)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct netdev_rx_queue *rxq;
+ unsigned long xa_idx;
+ unsigned int i;
+
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ binding = dev->_rx[i].mp_params.mp_priv;
+ if (!binding)
+ continue;
+
+ xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
+ if (rxq == &dev->_rx[i]) {
+ xa_erase(&binding->bound_rxqs, xa_idx);
+ break;
+ }
+ }
+}
+
+/*** "Dmabuf devmem memory provider" ***/
+
+int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ if (!binding)
+ return -EINVAL;
+
+ if (!pool->dma_map)
+ return -EOPNOTSUPP;
+
+ if (pool->dma_sync)
+ return -EOPNOTSUPP;
+
+ if (pool->p.order != 0)
+ return -E2BIG;
+
+ net_devmem_dmabuf_binding_get(binding);
+ return 0;
+}
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+ struct net_iov *niov;
+ netmem_ref netmem;
+
+ niov = net_devmem_alloc_dmabuf(binding);
+ if (!niov)
+ return 0;
+
+ netmem = net_iov_to_netmem(niov);
+
+ page_pool_set_pp_info(pool, netmem);
+
+ pool->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
+ return netmem;
+}
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
+
+ net_devmem_dmabuf_binding_put(binding);
+}
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem));
+
+ if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ return false;
+
+ if (WARN_ON_ONCE(refcount != 1))
+ return false;
+
+ page_pool_clear_pp_info(netmem);
+
+ net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
+
+ /* We don't want the page pool put_page()ing our net_iovs. */
+ return false;
+}
diff --git a/net/core/devmem.h b/net/core/devmem.h
new file mode 100644
index 000000000000..76099ef9c482
--- /dev/null
+++ b/net/core/devmem.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Device memory TCP support
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ * Willem de Bruijn <willemb@google.com>
+ * Kaiyuan Zhang <kaiyuanz@google.com>
+ *
+ */
+#ifndef _NET_DEVMEM_H
+#define _NET_DEVMEM_H
+
+struct netlink_ext_ack;
+
+struct net_devmem_dmabuf_binding {
+ struct dma_buf *dmabuf;
+ struct dma_buf_attachment *attachment;
+ struct sg_table *sgt;
+ struct net_device *dev;
+ struct gen_pool *chunk_pool;
+
+ /* The user holds a ref (via the netlink API) for as long as they want
+ * the binding to remain alive. Each page pool using this binding holds
+ * a ref to keep the binding alive. Each allocated net_iov holds a
+ * ref.
+ *
+ * The binding undos itself and unmaps the underlying dmabuf once all
+ * those refs are dropped and the binding is no longer desired or in
+ * use.
+ */
+ refcount_t ref;
+
+ /* The list of bindings currently active. Used for netlink to notify us
+ * of the user dropping the bind.
+ */
+ struct list_head list;
+
+ /* rxq's this binding is active on. */
+ struct xarray bound_rxqs;
+
+ /* ID of this binding. Globally unique to all bindings currently
+ * active.
+ */
+ u32 id;
+};
+
+#if defined(CONFIG_NET_DEVMEM)
+/* Owner of the dma-buf chunks inserted into the gen pool. Each scatterlist
+ * entry from the dmabuf is inserted into the genpool as a chunk, and needs
+ * this owner struct to keep track of some metadata necessary to create
+ * allocations from this chunk.
+ */
+struct dmabuf_genpool_chunk_owner {
+ /* Offset into the dma-buf where this chunk starts. */
+ unsigned long base_virtual;
+
+ /* dma_addr of the start of the chunk. */
+ dma_addr_t base_dma_addr;
+
+ /* Array of net_iovs for this chunk. */
+ struct net_iov *niovs;
+ size_t num_niovs;
+
+ struct net_devmem_dmabuf_binding *binding;
+};
+
+void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding);
+struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack);
+void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding);
+int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack);
+void dev_dmabuf_uninstall(struct net_device *dev);
+
+static inline struct dmabuf_genpool_chunk_owner *
+net_iov_owner(const struct net_iov *niov)
+{
+ return niov->owner;
+}
+
+static inline unsigned int net_iov_idx(const struct net_iov *niov)
+{
+ return niov - net_iov_owner(niov)->niovs;
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_iov_binding(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding;
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
+
+ return owner->base_virtual +
+ ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT);
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return net_iov_owner(niov)->binding->id;
+}
+
+static inline void
+net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding)
+{
+ refcount_inc(&binding->ref);
+}
+
+static inline void
+net_devmem_dmabuf_binding_put(struct net_devmem_dmabuf_binding *binding)
+{
+ if (!refcount_dec_and_test(&binding->ref))
+ return;
+
+ __net_devmem_dmabuf_binding_free(binding);
+}
+
+struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
+void net_devmem_free_dmabuf(struct net_iov *ppiov);
+
+#else
+struct net_devmem_dmabuf_binding;
+
+static inline void
+__net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline struct net_devmem_dmabuf_binding *
+net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
+ struct netlink_ext_ack *extack)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void
+net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+}
+
+static inline int
+net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
+ struct net_devmem_dmabuf_binding *binding,
+ struct netlink_ext_ack *extack)
+
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void dev_dmabuf_uninstall(struct net_device *dev)
+{
+}
+
+static inline struct net_iov *
+net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
+{
+ return NULL;
+}
+
+static inline void net_devmem_free_dmabuf(struct net_iov *ppiov)
+{
+}
+
+static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov)
+{
+ return 0;
+}
+
+static inline u32 net_iov_binding_id(const struct net_iov *niov)
+{
+ return 0;
+}
+#endif
+
+#endif /* _NET_DEVMEM_H */
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6ebffbc63236..154a2681f55c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -11,6 +11,7 @@
#include <linux/list.h>
#include <linux/module.h>
#include <net/net_namespace.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/fib_rules.h>
#include <net/ip_tunnels.h>
@@ -72,7 +73,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->suppress_prefixlen = -1;
r->suppress_ifgroup = -1;
- /* The lock is not required here, the list in unreacheable
+ /* The lock is not required here, the list in unreachable
* at the moment this function is called */
list_add_tail(&r->list, &ops->rules_list);
return 0;
@@ -766,7 +767,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = {
[FRA_PROTOCOL] = { .type = NLA_U8 },
[FRA_IP_PROTO] = { .type = NLA_U8 },
[FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
- [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }
+ [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) },
+ [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2),
};
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1205,8 +1207,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, ops->nlgroup, err);
+ rtnl_set_sk_err(net, ops->nlgroup, err);
}
static void attach_rules(struct list_head *rules, struct net_device *dev)
diff --git a/net/core/filter.c b/net/core/filter.c
index f3c72cf86099..0f4d9f3b206e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -84,6 +84,7 @@
#include <net/netkit.h>
#include <linux/un.h>
#include <net/xdp_sock_drv.h>
+#include <net/inet_dscp.h>
#include "dev.h"
@@ -2371,7 +2372,7 @@ static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4 = {
.flowi4_flags = FLOWI_FLAG_ANYSRC,
.flowi4_mark = skb->mark,
- .flowi4_tos = RT_TOS(ip4h->tos),
+ .flowi4_tos = ip4h->tos & INET_DSCP_MASK,
.flowi4_oif = dev->ifindex,
.flowi4_proto = ip4h->protocol,
.daddr = ip4h->daddr,
@@ -3189,6 +3190,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
bpf_push_mac_rcsum(skb);
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
+ skb_reset_mac_len(skb);
bpf_compute_data_pointers(skb);
return ret;
@@ -5278,6 +5280,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
return -EINVAL;
inet_csk(sk)->icsk_rto_min = timeout;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS))
+ return -EINVAL;
+ tp->bpf_sock_ops_cb_flags = val;
+ break;
default:
return -EINVAL;
}
@@ -5366,6 +5373,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
if (*optlen < 1)
return -EINVAL;
break;
+ case TCP_BPF_SOCK_OPS_CB_FLAGS:
+ if (*optlen != sizeof(int))
+ return -EINVAL;
+ if (getopt) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ int cb_flags = tp->bpf_sock_ops_cb_flags;
+
+ memcpy(optval, &cb_flags, *optlen);
+ return 0;
+ }
+ return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
default:
if (getopt)
return -EINVAL;
@@ -5899,7 +5917,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl4.flowi4_iif = params->ifindex;
fl4.flowi4_oif = 0;
}
- fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
+ fl4.flowi4_tos = params->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
@@ -12045,7 +12063,7 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
}
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
-BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@@ -12094,6 +12112,7 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);
diff --git a/net/core/gro.c b/net/core/gro.c
index b3b43de1a650..802b4a062400 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -374,7 +374,7 @@ static void gro_list_prepare(const struct list_head *head,
skb_mac_header(skb),
maclen);
- /* in most common scenarions 'slow_gro' is 0
+ /* in most common scenarios 'slow_gro' is 0
* otherwise we are already on some slower paths
* either skip all the infrequent tests altogether or
* avoid trying too hard to skip each of them individually
@@ -408,7 +408,8 @@ static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
pinfo = skb_shinfo(skb);
frag0 = &pinfo->frags[0];
- if (pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) &&
+ if (pinfo->nr_frags && skb_frag_page(frag0) &&
+ !PageHighMem(skb_frag_page(frag0)) &&
(!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index afb05f58b64c..1a14f915b7a4 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -12,6 +12,7 @@
#include <net/gre.h>
#include <net/ip6_route.h>
#include <net/ipv6_stubs.h>
+#include <net/inet_dscp.h>
struct bpf_lwt_prog {
struct bpf_prog *prog;
@@ -205,7 +206,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb)
fl4.flowi4_oif = oif;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_uid = sock_net_uid(net, sk);
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_flags = FLOWI_FLAG_ANYSRC;
fl4.flowi4_proto = iph->protocol;
fl4.daddr = iph->daddr;
diff --git a/net/core/mp_dmabuf_devmem.h b/net/core/mp_dmabuf_devmem.h
new file mode 100644
index 000000000000..67cd0dd7319c
--- /dev/null
+++ b/net/core/mp_dmabuf_devmem.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Dmabuf device memory provider.
+ *
+ * Authors: Mina Almasry <almasrymina@google.com>
+ *
+ */
+#ifndef _NET_MP_DMABUF_DEVMEM_H
+#define _NET_MP_DMABUF_DEVMEM_H
+
+#include <net/netmem.h>
+
+#if defined(CONFIG_NET_DEVMEM)
+int mp_dmabuf_devmem_init(struct page_pool *pool);
+
+netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp);
+
+void mp_dmabuf_devmem_destroy(struct page_pool *pool);
+
+bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem);
+#else
+static inline int mp_dmabuf_devmem_init(struct page_pool *pool)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline netmem_ref
+mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp)
+{
+ return 0;
+}
+
+static inline void mp_dmabuf_devmem_destroy(struct page_pool *pool)
+{
+}
+
+static inline bool
+mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
+{
+ return false;
+}
+#endif
+
+#endif /* _NET_MP_DMABUF_DEVMEM_H */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a6fe88eca939..77b819cd995b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3530,8 +3530,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
void neigh_app_ns(struct neighbour *n)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 291fdf4a328b..05cf5347f25e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -32,6 +32,7 @@
#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
+static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -425,6 +426,9 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
+ if (val > S32_MAX)
+ return -ERANGE;
+
WRITE_ONCE(dev->napi_defer_hard_irqs, val);
return 0;
}
@@ -438,7 +442,7 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
}
-NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
@@ -1056,7 +1060,7 @@ static const void *rx_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1740,7 +1744,7 @@ static const void *netdev_queue_namespace(const struct kobject *kobj)
struct device *dev = &queue->dev->dev;
const void *ns = NULL;
- if (dev->class && dev->class->ns_type)
+ if (dev->class && dev->class->namespace)
ns = dev->class->namespace(dev);
return ns;
@@ -1764,8 +1768,7 @@ static const struct kobj_type netdev_queue_ktype = {
static bool netdev_uses_bql(const struct net_device *dev)
{
- if (dev->features & NETIF_F_LLTX ||
- dev->priv_flags & IFF_NO_QUEUE)
+ if (dev->lltx || (dev->priv_flags & IFF_NO_QUEUE))
return false;
return IS_ENABLED(CONFIG_BQL);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6a823ba906c6..11e4dd4f09ed 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -125,7 +125,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
int err = -ENOMEM;
void *data = NULL;
- if (ops->id && ops->size) {
+ if (ops->id) {
data = kzalloc(ops->size, GFP_KERNEL);
if (!data)
goto out;
@@ -140,7 +140,7 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
if (!err)
return 0;
- if (ops->id && ops->size) {
+ if (ops->id) {
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
@@ -182,7 +182,8 @@ static void ops_free_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
struct net *net;
- if (ops->size && ops->id) {
+
+ if (ops->id) {
list_for_each_entry(net, net_exit_list, exit_list)
kfree(net_generic(net, *ops->id));
}
@@ -308,16 +309,38 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+static __net_init void preinit_net_sysctl(struct net *net)
+{
+ net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
+ net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
+}
+
/* init code that must occur even if setup_net() is not called. */
-static __net_init void preinit_net(struct net *net)
+static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns)
{
+ refcount_set(&net->passive, 1);
+ refcount_set(&net->ns.count, 1);
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
+
+ get_random_bytes(&net->hash_mix, sizeof(u32));
+ net->dev_base_seq = 1;
+ net->user_ns = user_ns;
+
+ idr_init(&net->netns_ids);
+ spin_lock_init(&net->nsid_lock);
+ mutex_init(&net->ipv4.ra_mutex);
+ preinit_net_sysctl(net);
}
/*
* setup_net runs the initializers for the network namespace object.
*/
-static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
+static __net_init int setup_net(struct net *net)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
@@ -325,19 +348,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
LIST_HEAD(dev_kill_list);
int error = 0;
- refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
-
- refcount_set(&net->passive, 1);
- get_random_bytes(&net->hash_mix, sizeof(u32));
preempt_disable();
net->net_cookie = gen_cookie_next(&net_cookie);
preempt_enable();
- net->dev_base_seq = 1;
- net->user_ns = user_ns;
- idr_init(&net->netns_ids);
- spin_lock_init(&net->nsid_lock);
- mutex_init(&net->ipv4.ra_mutex);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -382,32 +395,6 @@ out_undo:
goto out;
}
-static int __net_init net_defaults_init_net(struct net *net)
-{
- net->core.sysctl_somaxconn = SOMAXCONN;
- /* Limits per socket sk_omem_alloc usage.
- * TCP zerocopy regular usage needs 128 KB.
- */
- net->core.sysctl_optmem_max = 128 * 1024;
- net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
-
- return 0;
-}
-
-static struct pernet_operations net_defaults_ops = {
- .init = net_defaults_init_net,
-};
-
-static __init int net_defaults_init(void)
-{
- if (register_pernet_subsys(&net_defaults_ops))
- panic("Cannot initialize net default settings");
-
- return 0;
-}
-
-core_initcall(net_defaults_init);
-
#ifdef CONFIG_NET_NS
static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
{
@@ -496,8 +483,7 @@ struct net *copy_net_ns(unsigned long flags,
goto dec_ucounts;
}
- preinit_net(net);
- refcount_set(&net->passive, 1);
+ preinit_net(net, user_ns);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -505,7 +491,7 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0)
goto put_userns;
- rv = setup_net(net, user_ns);
+ rv = setup_net(net);
up_read(&pernet_ops_rwsem);
@@ -1199,9 +1185,10 @@ void __init net_ns_init(void)
#ifdef CONFIG_KEYS
init_net.key_domain = &init_net_key_domain;
#endif
+ preinit_net(&init_net, &init_user_ns);
+
down_write(&pernet_ops_rwsem);
- preinit_net(&init_net);
- if (setup_net(&init_net, &init_user_ns))
+ if (setup_net(&init_net))
panic("Could not setup the initial network namespace");
init_net_initialized = true;
@@ -1244,7 +1231,7 @@ static int __register_pernet_operations(struct list_head *list,
LIST_HEAD(net_exit_list);
list_add_tail(&ops->list, list);
- if (ops->init || (ops->id && ops->size)) {
+ if (ops->init || ops->id) {
/* We held write locked pernet_ops_rwsem, and parallel
* setup_net() and cleanup_net() are not possible.
*/
@@ -1310,6 +1297,9 @@ static int register_pernet_operations(struct list_head *list,
{
int error;
+ if (WARN_ON(!!ops->id ^ !!ops->size))
+ return -EINVAL;
+
if (ops->id) {
error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
GFP_KERNEL);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 8350a0afa9ec..b28424ae06d5 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -9,6 +9,7 @@
#include "netdev-genl-gen.h"
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Integer value ranges */
static const struct netlink_range_validation netdev_a_page_pool_id_range = {
@@ -27,6 +28,11 @@ const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFIND
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
};
+const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
@@ -74,6 +80,13 @@ static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE
[NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1),
};
+/* NETDEV_CMD_BIND_RX - do */
+static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] = {
+ [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
+ [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -151,6 +164,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_QSTATS_SCOPE,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NETDEV_CMD_BIND_RX,
+ .doit = netdev_nl_bind_rx_doit,
+ .policy = netdev_bind_rx_nl_policy,
+ .maxattr = NETDEV_A_DMABUF_FD,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
@@ -168,4 +188,7 @@ struct genl_family netdev_nl_family __ro_after_init = {
.n_split_ops = ARRAY_SIZE(netdev_nl_ops),
.mcgrps = netdev_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
+ .sock_priv_size = sizeof(struct list_head),
+ .sock_priv_init = (void *)netdev_nl_sock_priv_init,
+ .sock_priv_destroy = (void *)netdev_nl_sock_priv_destroy,
};
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 4db40fd5b4a9..8cda334fd042 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -10,9 +10,11 @@
#include <net/genetlink.h>
#include <uapi/linux/netdev.h>
+#include <linux/list.h>
/* Common nested types */
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
@@ -30,6 +32,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,
@@ -38,4 +41,7 @@ enum {
extern struct genl_family netdev_nl_family;
+void netdev_nl_sock_priv_init(struct list_head *priv);
+void netdev_nl_sock_priv_destroy(struct list_head *priv);
+
#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 05f9515d2c05..1cb954f2d39e 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -3,16 +3,17 @@
#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
+#include <net/busy_poll.h>
#include <net/net_namespace.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <net/xdp_sock.h>
-#include <net/netdev_rx_queue.h>
-#include <net/netdev_queues.h>
-#include <net/busy_poll.h>
-#include "netdev-genl-gen.h"
#include "dev.h"
+#include "devmem.h"
+#include "netdev-genl-gen.h"
struct netdev_nl_dump_ctx {
unsigned long ifindex;
@@ -216,10 +217,12 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
napi = napi_by_id(napi_id);
- if (napi)
+ if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
- else
- err = -EINVAL;
+ } else {
+ NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_NAPI_ID]);
+ err = -ENOENT;
+ }
rtnl_unlock();
@@ -292,6 +295,7 @@ static int
netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
u32 q_idx, u32 q_type, const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding;
struct netdev_rx_queue *rxq;
struct netdev_queue *txq;
void *hdr;
@@ -311,6 +315,12 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
rxq->napi->napi_id))
goto nla_put_failure;
+
+ binding = rxq->mp_params.mp_priv;
+ if (binding &&
+ nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id))
+ goto nla_put_failure;
+
break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
@@ -721,6 +731,129 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
return err;
}
+int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_queue_id_nl_policy)];
+ struct net_devmem_dmabuf_binding *binding;
+ struct list_head *sock_binding_list;
+ u32 ifindex, dmabuf_fd, rxq_idx;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ struct nlattr *attr;
+ int rem, err = 0;
+ void *hdr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_FD) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_DMABUF_QUEUES))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+ dmabuf_fd = nla_get_u32(info->attrs[NETDEV_A_DMABUF_FD]);
+
+ sock_binding_list = genl_sk_priv_get(&netdev_nl_family,
+ NETLINK_CB(skb).sk);
+ if (IS_ERR(sock_binding_list))
+ return PTR_ERR(sock_binding_list);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_genlmsg_free;
+ }
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (!netdev || !netif_device_present(netdev)) {
+ err = -ENODEV;
+ goto err_unlock;
+ }
+
+ if (dev_xdp_prog_count(netdev)) {
+ NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached");
+ err = -EEXIST;
+ goto err_unlock;
+ }
+
+ binding = net_devmem_bind_dmabuf(netdev, dmabuf_fd, info->extack);
+ if (IS_ERR(binding)) {
+ err = PTR_ERR(binding);
+ goto err_unlock;
+ }
+
+ nla_for_each_attr_type(attr, NETDEV_A_DMABUF_QUEUES,
+ genlmsg_data(info->genlhdr),
+ genlmsg_len(info->genlhdr), rem) {
+ err = nla_parse_nested(
+ tb, ARRAY_SIZE(netdev_queue_id_nl_policy) - 1, attr,
+ netdev_queue_id_nl_policy, info->extack);
+ if (err < 0)
+ goto err_unbind;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_ID) ||
+ NL_REQ_ATTR_CHECK(info->extack, attr, tb, NETDEV_A_QUEUE_TYPE)) {
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ if (nla_get_u32(tb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) {
+ NL_SET_BAD_ATTR(info->extack, tb[NETDEV_A_QUEUE_TYPE]);
+ err = -EINVAL;
+ goto err_unbind;
+ }
+
+ rxq_idx = nla_get_u32(tb[NETDEV_A_QUEUE_ID]);
+
+ err = net_devmem_bind_dmabuf_to_queue(netdev, rxq_idx, binding,
+ info->extack);
+ if (err)
+ goto err_unbind;
+ }
+
+ list_add(&binding->list, sock_binding_list);
+
+ nla_put_u32(rsp, NETDEV_A_DMABUF_ID, binding->id);
+ genlmsg_end(rsp, hdr);
+
+ err = genlmsg_reply(rsp, info);
+ if (err)
+ goto err_unbind;
+
+ rtnl_unlock();
+
+ return 0;
+
+err_unbind:
+ net_devmem_unbind_dmabuf(binding);
+err_unlock:
+ rtnl_unlock();
+err_genlmsg_free:
+ nlmsg_free(rsp);
+ return err;
+}
+
+void netdev_nl_sock_priv_init(struct list_head *priv)
+{
+ INIT_LIST_HEAD(priv);
+}
+
+void netdev_nl_sock_priv_destroy(struct list_head *priv)
+{
+ struct net_devmem_dmabuf_binding *binding;
+ struct net_devmem_dmabuf_binding *temp;
+
+ list_for_each_entry_safe(binding, temp, priv, list) {
+ rtnl_lock();
+ net_devmem_unbind_dmabuf(binding);
+ rtnl_unlock();
+ }
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
new file mode 100644
index 000000000000..e217a5838c87
--- /dev/null
+++ b/net/core/netdev_rx_queue.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/netdevice.h>
+#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+
+#include "page_pool_priv.h"
+
+int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
+{
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, rxq_idx);
+ void *new_mem, *old_mem;
+ int err;
+
+ if (!dev->queue_mgmt_ops || !dev->queue_mgmt_ops->ndo_queue_stop ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_free ||
+ !dev->queue_mgmt_ops->ndo_queue_mem_alloc ||
+ !dev->queue_mgmt_ops->ndo_queue_start)
+ return -EOPNOTSUPP;
+
+ ASSERT_RTNL();
+
+ new_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!new_mem)
+ return -ENOMEM;
+
+ old_mem = kvzalloc(dev->queue_mgmt_ops->ndo_queue_mem_size, GFP_KERNEL);
+ if (!old_mem) {
+ err = -ENOMEM;
+ goto err_free_new_mem;
+ }
+
+ err = dev->queue_mgmt_ops->ndo_queue_mem_alloc(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_free_old_mem;
+
+ err = page_pool_check_memory_provider(dev, rxq);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_stop(dev, old_mem, rxq_idx);
+ if (err)
+ goto err_free_new_queue_mem;
+
+ err = dev->queue_mgmt_ops->ndo_queue_start(dev, new_mem, rxq_idx);
+ if (err)
+ goto err_start_queue;
+
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+
+ kvfree(old_mem);
+ kvfree(new_mem);
+
+ return 0;
+
+err_start_queue:
+ /* Restarting the queue with old_mem should be successful as we haven't
+ * changed any of the queue configuration, and there is not much we can
+ * do to recover from a failure here.
+ *
+ * WARN if we fail to recover the old rx queue, and at least free
+ * old_mem so we don't also leak that.
+ */
+ if (dev->queue_mgmt_ops->ndo_queue_start(dev, old_mem, rxq_idx)) {
+ WARN(1,
+ "Failed to restart old queue in error path. RX queue %d may be unhealthy.",
+ rxq_idx);
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, old_mem);
+ }
+
+err_free_new_queue_mem:
+ dev->queue_mgmt_ops->ndo_queue_mem_free(dev, new_mem);
+
+err_free_old_mem:
+ kvfree(old_mem);
+
+err_free_new_mem:
+ kvfree(new_mem);
+
+ return err;
+}
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
new file mode 100644
index 000000000000..7eadb8393e00
--- /dev/null
+++ b/net/core/netmem_priv.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NETMEM_PRIV_H
+#define __NETMEM_PRIV_H
+
+static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp_magic;
+}
+
+static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
+{
+ __netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
+}
+
+static inline void netmem_clear_pp_magic(netmem_ref netmem)
+{
+ __netmem_clear_lsb(netmem)->pp_magic = 0;
+}
+
+static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
+{
+ __netmem_clear_lsb(netmem)->pp = pool;
+}
+
+static inline void netmem_set_dma_addr(netmem_ref netmem,
+ unsigned long dma_addr)
+{
+ __netmem_clear_lsb(netmem)->dma_addr = dma_addr;
+}
+#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d657b042d5a0..ca52cbe0f63c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -48,8 +48,6 @@
static struct sk_buff_head skb_pool;
-DEFINE_STATIC_SRCU(netpoll_srcu);
-
#define USEC_PER_POLL 50
#define MAX_SKB_SIZE \
@@ -162,7 +160,7 @@ static void poll_one_napi(struct napi_struct *napi)
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
return;
- /* We explicilty pass the polling call a budget of 0 to
+ /* We explicitly pass the polling call a budget of 0 to
* indicate that we are clearing the Tx path only.
*/
work = napi->poll(napi, 0);
@@ -220,23 +218,20 @@ EXPORT_SYMBOL(netpoll_poll_dev);
void netpoll_poll_disable(struct net_device *dev)
{
struct netpoll_info *ni;
- int idx;
+
might_sleep();
- idx = srcu_read_lock(&netpoll_srcu);
- ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
down(&ni->dev_lock);
- srcu_read_unlock(&netpoll_srcu, idx);
}
void netpoll_poll_enable(struct net_device *dev)
{
struct netpoll_info *ni;
- rcu_read_lock();
- ni = rcu_dereference(dev->npinfo);
+
+ ni = rtnl_dereference(dev->npinfo);
if (ni)
up(&ni->dev_lock);
- rcu_read_unlock();
}
static void refill_skbs(void)
@@ -624,12 +619,9 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
const struct net_device_ops *ops;
int err;
- np->dev = ndev;
- strscpy(np->dev_name, ndev->name, IFNAMSIZ);
-
if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
np_err(np, "%s doesn't support polling, aborting\n",
- np->dev_name);
+ ndev->name);
err = -ENOTSUPP;
goto out;
}
@@ -647,7 +639,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_set(&npinfo->refcnt, 1);
- ops = np->dev->netdev_ops;
+ ops = ndev->netdev_ops;
if (ops->ndo_netpoll_setup) {
err = ops->ndo_netpoll_setup(ndev, npinfo);
if (err)
@@ -658,6 +650,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
refcount_inc(&npinfo->refcnt);
}
+ np->dev = ndev;
+ strscpy(np->dev_name, ndev->name, IFNAMSIZ);
npinfo->netpoll = np;
/* last thing to do is link it to the net device structure */
@@ -675,6 +669,7 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
struct net_device *ndev = NULL;
+ bool ip_overwritten = false;
struct in_device *in_dev;
int err;
@@ -739,6 +734,7 @@ put_noaddr:
}
np->local_ip.ip = ifa->ifa_local;
+ ip_overwritten = true;
np_info(np, "local IP %pI4\n", &np->local_ip.ip);
} else {
#if IS_ENABLED(CONFIG_IPV6)
@@ -755,6 +751,7 @@ put_noaddr:
!!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
continue;
np->local_ip.in6 = ifp->addr;
+ ip_overwritten = true;
err = 0;
break;
}
@@ -785,6 +782,9 @@ put_noaddr:
return 0;
put:
+ DEBUG_NET_WARN_ON_ONCE(np->dev);
+ if (ip_overwritten)
+ memset(&np->local_ip, 0, sizeof(np->local_ip));
netdev_put(ndev, &np->dev_tracker);
unlock:
rtnl_unlock();
@@ -824,8 +824,6 @@ void __netpoll_cleanup(struct netpoll *np)
if (!npinfo)
return;
- synchronize_srcu(&netpoll_srcu);
-
if (refcount_dec_and_test(&npinfo->refcnt)) {
const struct net_device_ops *ops;
@@ -851,14 +849,20 @@ void __netpoll_free(struct netpoll *np)
}
EXPORT_SYMBOL_GPL(__netpoll_free);
+void do_netpoll_cleanup(struct netpoll *np)
+{
+ __netpoll_cleanup(np);
+ netdev_put(np->dev, &np->dev_tracker);
+ np->dev = NULL;
+}
+EXPORT_SYMBOL(do_netpoll_cleanup);
+
void netpoll_cleanup(struct netpoll *np)
{
rtnl_lock();
if (!np->dev)
goto out;
- __netpoll_cleanup(np);
- netdev_put(np->dev, &np->dev_tracker);
- np->dev = NULL;
+ do_netpoll_cleanup(np);
out:
rtnl_unlock();
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 2abe6e919224..a813d30d2135 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/device.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/xdp.h>
@@ -24,8 +25,12 @@
#include <trace/events/page_pool.h>
+#include "mp_dmabuf_devmem.h"
+#include "netmem_priv.h"
#include "page_pool_priv.h"
+DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
@@ -187,6 +192,8 @@ static int page_pool_init(struct page_pool *pool,
int cpuid)
{
unsigned int ring_qsize = 1024; /* Default */
+ struct netdev_rx_queue *rxq;
+ int err;
page_pool_struct_check();
@@ -268,7 +275,37 @@ static int page_pool_init(struct page_pool *pool,
if (pool->dma_map)
get_device(pool->p.dev);
+ if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
+ /* We rely on rtnl_lock()ing to make sure netdev_rx_queue
+ * configuration doesn't change while we're initializing
+ * the page_pool.
+ */
+ ASSERT_RTNL();
+ rxq = __netif_get_rx_queue(pool->slow.netdev,
+ pool->slow.queue_idx);
+ pool->mp_priv = rxq->mp_params.mp_priv;
+ }
+
+ if (pool->mp_priv) {
+ err = mp_dmabuf_devmem_init(pool);
+ if (err) {
+ pr_warn("%s() mem-provider init failed %d\n", __func__,
+ err);
+ goto free_ptr_ring;
+ }
+
+ static_branch_inc(&page_pool_mem_providers);
+ }
+
return 0;
+
+free_ptr_ring:
+ ptr_ring_cleanup(&pool->ring, NULL);
+#ifdef CONFIG_PAGE_POOL_STATS
+ if (!pool->system)
+ free_percpu(pool->recycle_stats);
+#endif
+ return err;
}
static void page_pool_uninit(struct page_pool *pool)
@@ -358,7 +395,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
if (unlikely(!netmem))
break;
- if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
+ if (likely(netmem_is_pref_nid(netmem, pref_nid))) {
pool->alloc.cache[pool->alloc.count++] = netmem;
} else {
/* NUMA mismatch;
@@ -452,32 +489,6 @@ unmap_failed:
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp = pool;
- page->pp_magic |= PP_SIGNATURE;
-
- /* Ensuring all pages have been split into one fragment initially:
- * page_pool_set_pp_info() is only called once for every page when it
- * is allocated from the page allocator and page_pool_fragment_page()
- * is dirtying the same cache line as the page->pp_magic above, so
- * the overhead is negligible.
- */
- page_pool_fragment_netmem(netmem, 1);
- if (pool->has_init_callback)
- pool->slow.init_callback(netmem, pool->slow.init_arg);
-}
-
-static void page_pool_clear_pp_info(netmem_ref netmem)
-{
- struct page *page = netmem_to_page(netmem);
-
- page->pp_magic = 0;
- page->pp = NULL;
-}
-
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
@@ -573,7 +584,10 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
return netmem;
/* Slow-path: cache empty, do real allocation */
- netmem = __page_pool_alloc_pages_slow(pool, gfp);
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp);
+ else
+ netmem = __page_pool_alloc_pages_slow(pool, gfp);
return netmem;
}
EXPORT_SYMBOL(page_pool_alloc_netmem);
@@ -609,6 +623,28 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
+{
+ netmem_set_pp(netmem, pool);
+ netmem_or_pp_magic(netmem, PP_SIGNATURE);
+
+ /* Ensuring all pages have been split into one fragment initially:
+ * page_pool_set_pp_info() is only called once for every page when it
+ * is allocated from the page allocator and page_pool_fragment_page()
+ * is dirtying the same cache line as the page->pp_magic above, so
+ * the overhead is negligible.
+ */
+ page_pool_fragment_netmem(netmem, 1);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(netmem, pool->slow.init_arg);
+}
+
+void page_pool_clear_pp_info(netmem_ref netmem)
+{
+ netmem_clear_pp_magic(netmem);
+ netmem_set_pp(netmem, NULL);
+}
+
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
@@ -637,8 +673,13 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
{
int count;
+ bool put;
- __page_pool_release_page_dma(pool, netmem);
+ put = true;
+ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv)
+ put = mp_dmabuf_devmem_release_page(pool, netmem);
+ else
+ __page_pool_release_page_dma(pool, netmem);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
@@ -646,8 +687,10 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, netmem, count);
- page_pool_clear_pp_info(netmem);
- put_page(netmem_to_page(netmem));
+ if (put) {
+ page_pool_clear_pp_info(netmem);
+ put_page(netmem_to_page(netmem));
+ }
/* An optimization would be to call __free_pages(page, pool->p.order)
* knowing page is not part of page-cache (thus avoiding a
* __page_cache_release() call).
@@ -692,8 +735,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem,
static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
{
- return page_ref_count(netmem_to_page(netmem)) == 1 &&
- !page_is_pfmemalloc(netmem_to_page(netmem));
+ return netmem_is_net_iov(netmem) ||
+ (page_ref_count(netmem_to_page(netmem)) == 1 &&
+ !page_is_pfmemalloc(netmem_to_page(netmem)));
}
/* If the page refcnt == 1, this will try to recycle the page.
@@ -728,6 +772,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
/* Page found as candidate for recycling */
return netmem;
}
+
/* Fallback/non-XDP mode: API user have elevated refcnt.
*
* Many drivers split up the page into fragments, and some
@@ -949,7 +994,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
/* Empty recycle ring */
while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
- if (!(page_ref_count(netmem_to_page(netmem)) == 1))
+ if (!(netmem_ref_count(netmem) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
__func__, netmem_ref_count(netmem));
@@ -964,6 +1009,12 @@ static void __page_pool_destroy(struct page_pool *pool)
page_pool_unlist(pool);
page_pool_uninit(pool);
+
+ if (pool->mp_priv) {
+ mp_dmabuf_devmem_destroy(pool);
+ static_branch_dec(&page_pool_mem_providers);
+ }
+
kfree(pool);
}
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
index 90665d40f1eb..57439787b9c2 100644
--- a/net/core/page_pool_priv.h
+++ b/net/core/page_pool_priv.h
@@ -3,10 +3,56 @@
#ifndef __PAGE_POOL_PRIV_H
#define __PAGE_POOL_PRIV_H
+#include <net/page_pool/helpers.h>
+
+#include "netmem_priv.h"
+
s32 page_pool_inflight(const struct page_pool *pool, bool strict);
int page_pool_list(struct page_pool *pool);
void page_pool_detached(struct page_pool *pool);
void page_pool_unlist(struct page_pool *pool);
+static inline bool
+page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr)
+{
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
+ netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT);
+
+ /* We assume page alignment to shave off bottom bits,
+ * if this "compression" doesn't work we need to drop.
+ */
+ return addr != (dma_addr_t)netmem_get_dma_addr(netmem)
+ << PAGE_SHIFT;
+ }
+
+ netmem_set_dma_addr(netmem, addr);
+ return false;
+}
+
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
+}
+
+#if defined(CONFIG_PAGE_POOL)
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
+void page_pool_clear_pp_info(netmem_ref netmem);
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq);
+#else
+static inline void page_pool_set_pp_info(struct page_pool *pool,
+ netmem_ref netmem)
+{
+}
+static inline void page_pool_clear_pp_info(netmem_ref netmem)
+{
+}
+static inline int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
index 3a3277ba167b..48335766c1bf 100644
--- a/net/core/page_pool_user.c
+++ b/net/core/page_pool_user.c
@@ -4,10 +4,12 @@
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <net/net_debug.h>
-#include <net/page_pool/types.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
+#include <net/page_pool/types.h>
#include <net/sock.h>
+#include "devmem.h"
#include "page_pool_priv.h"
#include "netdev-genl-gen.h"
@@ -212,6 +214,7 @@ static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
const struct genl_info *info)
{
+ struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
size_t inflight, refsz;
void *hdr;
@@ -241,6 +244,9 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
pool->user.detach_time))
goto err_cancel;
+ if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
+ goto err_cancel;
+
genlmsg_end(rsp, hdr);
return 0;
@@ -344,6 +350,30 @@ void page_pool_unlist(struct page_pool *pool)
mutex_unlock(&page_pools_lock);
}
+int page_pool_check_memory_provider(struct net_device *dev,
+ struct netdev_rx_queue *rxq)
+{
+ struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ if (!binding)
+ return 0;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
+ if (pool->mp_priv != binding)
+ continue;
+
+ if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
+ mutex_unlock(&page_pools_lock);
+ return 0;
+ }
+ }
+ mutex_unlock(&page_pools_lock);
+ return -ENODATA;
+}
+
static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
{
struct page_pool *pool;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 197a50ef8e2e..34f68ef74b8f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -69,7 +69,7 @@
*
* By design there should only be *one* "controlling" process. In practice
* multiple write accesses gives unpredictable result. Understood by "write"
- * to /proc gives result code thats should be read be the "writer".
+ * to /proc gives result code that should be read be the "writer".
* For practical use this should be no problem.
*
* Note when adding devices to a specific CPU there good idea to also assign
@@ -2371,11 +2371,11 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
if (pkt_dev->spi) {
/* We need as quick as possible to find the right SA
- * Searching with minimum criteria to archieve this.
+ * Searching with minimum criteria to achieve, this.
*/
x = xfrm_state_lookup_byspi(pn->net, htonl(pkt_dev->spi), AF_INET);
} else {
- /* slow path: we dont already have xfrm_state */
+ /* slow path: we don't already have xfrm_state */
x = xfrm_stateonly_find(pn->net, DUMMY_MARK, 0,
(xfrm_address_t *)&pkt_dev->cur_daddr,
(xfrm_address_t *)&pkt_dev->cur_saddr,
@@ -3838,8 +3838,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
pkt_dev->ipsmode = XFRM_MODE_TRANSPORT;
pkt_dev->ipsproto = IPPROTO_ESP;
- /* xfrm tunnel mode needs additional dst to extract outter
- * ip header protocol/ttl/id field, here creat a phony one.
+ /* xfrm tunnel mode needs additional dst to extract outer
+ * ip header protocol/ttl/id field, here create a phony one.
* instead of looking for a valid rt, which definitely hurting
* performance under such circumstance.
*/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 73fd7f543fd0..f0a520987085 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2724,7 +2724,7 @@ static int do_set_proto_down(struct net_device *dev,
bool proto_down;
int err;
- if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN)) {
+ if (!dev->change_proto_down) {
NL_SET_ERR_MSG(extack, "Protodown not supported by device");
return -EOPNOTSUPP;
}
@@ -4087,8 +4087,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
}
return skb;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ rtnl_set_sk_err(net, RTNLGRP_LINK, err);
return NULL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83f8cd8aa2d1..74149dc4ee31 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -88,6 +88,7 @@
#include <linux/textsearch.h>
#include "dev.h"
+#include "netmem_priv.h"
#include "sock_destructor.h"
#ifdef CONFIG_SKB_EXTENSIONS
@@ -314,8 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
fragsz = SKB_DATA_ALIGN(fragsz);
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
- data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC,
- align_mask);
+ data = __page_frag_alloc_align(&nc->page, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN, align_mask);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
return data;
@@ -330,7 +331,8 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
fragsz = SKB_DATA_ALIGN(fragsz);
- data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC,
+ data = __page_frag_alloc_align(nc, fragsz,
+ GFP_ATOMIC | __GFP_NOWARN,
align_mask);
} else {
local_bh_disable();
@@ -349,7 +351,7 @@ static struct sk_buff *napi_skb_cache_get(void)
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!nc->skb_count)) {
nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
- GFP_ATOMIC,
+ GFP_ATOMIC | __GFP_NOWARN,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
if (unlikely(!nc->skb_count)) {
@@ -418,7 +420,8 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -469,7 +472,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(net_hotdata.skbuff_cache,
+ GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
@@ -917,9 +921,9 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_page(struct page *page)
+static bool is_pp_netmem(netmem_ref netmem)
{
- return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+ return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
}
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
@@ -1017,9 +1021,7 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(netmem_ref netmem)
{
- struct page *page = netmem_to_page(netmem);
-
- page = compound_head(page);
+ netmem = netmem_compound_head(netmem);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
* in order to preserve any existing bits, such as bit 0 for the
@@ -1028,10 +1030,10 @@ bool napi_pp_put_page(netmem_ref netmem)
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
- if (unlikely(!is_pp_page(page)))
+ if (unlikely(!is_pp_netmem(netmem)))
return false;
- page_pool_put_full_netmem(page->pp, page_to_netmem(page), false);
+ page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
return true;
}
@@ -1058,7 +1060,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
static int skb_pp_frag_ref(struct sk_buff *skb)
{
struct skb_shared_info *shinfo;
- struct page *head_page;
+ netmem_ref head_netmem;
int i;
if (!skb->pp_recycle)
@@ -1067,11 +1069,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
shinfo = skb_shinfo(skb);
for (i = 0; i < shinfo->nr_frags; i++) {
- head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
- if (likely(is_pp_page(head_page)))
- page_pool_ref_page(head_page);
+ head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
+ if (likely(is_pp_netmem(head_netmem)))
+ page_pool_ref_netmem(head_netmem);
else
- page_ref_inc(head_page);
+ page_ref_inc(netmem_to_page(head_netmem));
}
return 0;
}
@@ -1369,6 +1371,14 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
struct page *p;
u8 *vaddr;
+ if (skb_frag_is_net_iov(frag)) {
+ printk("%sskb frag %d: not readable\n", level, i);
+ len -= skb_frag_size(frag);
+ if (!len)
+ break;
+ continue;
+ }
+
skb_frag_foreach_page(frag, skb_frag_off(frag),
skb_frag_size(frag), p, p_off, p_len,
copied) {
@@ -1962,6 +1972,9 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
return -EINVAL;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!num_frags)
goto release;
@@ -2135,6 +2148,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
unsigned int size;
int headerlen;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2473,6 +2489,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
struct sk_buff *n;
int oldheadroom;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
return NULL;
@@ -2817,6 +2836,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
*/
int i, k, eat = (skb->tail + delta) - skb->end;
+ if (!skb_frags_readable(skb))
+ return NULL;
+
if (eat > 0 || skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
GFP_ATOMIC))
@@ -2970,6 +2992,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
to += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *f = &skb_shinfo(skb)->frags[i];
@@ -3158,9 +3183,15 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
/*
* then map the fragments
*/
+ if (!skb_frags_readable(skb))
+ return false;
+
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
+ if (WARN_ON_ONCE(!skb_frag_page(f)))
+ return false;
+
if (__splice_segment(skb_frag_page(f),
skb_frag_off(f), skb_frag_size(f),
offset, len, spd, false, sk, pipe))
@@ -3378,6 +3409,9 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
from += copy;
}
+ if (!skb_frags_readable(skb))
+ goto fault;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
int end;
@@ -3457,6 +3491,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
pos = copy;
}
+ if (WARN_ON_ONCE(!skb_frags_readable(skb)))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -3557,6 +3594,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
pos = copy;
}
+ if (!skb_frags_readable(skb))
+ return 0;
+
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
int end;
@@ -4048,6 +4088,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
+ skb1->unreadable = skb->unreadable;
skb_shinfo(skb)->nr_frags = 0;
skb1->data_len = skb->data_len;
skb1->len += skb1->data_len;
@@ -4095,6 +4136,8 @@ static inline void skb_split_no_header(struct sk_buff *skb,
pos += size;
}
skb_shinfo(skb1)->nr_frags = k;
+
+ skb1->unreadable = skb->unreadable;
}
/**
@@ -4169,8 +4212,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
/* Actual merge is delayed until the point when we know we can
* commit all, so that we don't have to undo partial changes
*/
- if (!to ||
- !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
+ if (!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
skb_frag_off(fragfrom))) {
merge = -1;
} else {
@@ -4333,6 +4375,9 @@ next_skb:
return block_limit - abs_offset;
}
+ if (!skb_frags_readable(st->cur_skb))
+ return 0;
+
if (st->frag_idx == 0 && !st->frag_data)
st->stepped_offset += skb_headlen(st->cur_skb);
@@ -4409,6 +4454,41 @@ void skb_abort_seq_read(struct skb_seq_state *st)
}
EXPORT_SYMBOL(skb_abort_seq_read);
+/**
+ * skb_copy_seq_read() - copy from a skb_seq_state to a buffer
+ * @st: source skb_seq_state
+ * @offset: offset in source
+ * @to: destination buffer
+ * @len: number of bytes to copy
+ *
+ * Copy @len bytes from @offset bytes into the source @st to the destination
+ * buffer @to. `offset` should increase (or be unchanged) with each subsequent
+ * call to this function. If offset needs to decrease from the previous use `st`
+ * should be reset first.
+ *
+ * Return: 0 on success or -EINVAL if the copy ended early
+ */
+int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len)
+{
+ const u8 *data;
+ u32 sqlen;
+
+ for (;;) {
+ sqlen = skb_seq_read(offset, &data, st);
+ if (sqlen == 0)
+ return -EINVAL;
+ if (sqlen >= len) {
+ memcpy(to, data, len);
+ return 0;
+ }
+ memcpy(to, data, sqlen);
+ to += sqlen;
+ offset += sqlen;
+ len -= sqlen;
+ }
+}
+EXPORT_SYMBOL(skb_copy_seq_read);
+
#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text,
@@ -5161,7 +5241,7 @@ EXPORT_SYMBOL_GPL(skb_to_sgvec);
* 3. sg_unmark_end
* 4. skb_to_sgvec(payload2)
*
- * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * When mapping multiple payload conditionally, skb_to_sgvec_nomark
* is more preferable.
*/
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -5945,7 +6025,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (to->pp_recycle != from->pp_recycle)
return false;
- if (len <= skb_tailroom(to)) {
+ if (skb_frags_readable(from) != skb_frags_readable(to))
+ return false;
+
+ if (len <= skb_tailroom(to) && skb_frags_readable(from)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
*delta_truesize = 0;
@@ -6019,7 +6102,7 @@ EXPORT_SYMBOL(skb_try_coalesce);
* @skb: buffer to clean
* @xnet: packet is crossing netns
*
- * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * skb_scrub_packet can be used after encapsulating or decapsulating a packet
* into/from a tunnel. Some information have to be cleared during these
* operations.
* skb_scrub_packet can also be used to clean a skb before injecting it in
@@ -6122,6 +6205,9 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
if (!pskb_may_pull(skb, write_len))
return -ENOMEM;
+ if (!skb_frags_readable(skb))
+ return -EFAULT;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;
@@ -6241,7 +6327,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
return err;
skb->protocol = skb->vlan_proto;
- skb->mac_len += VLAN_HLEN;
+ skb->network_header -= VLAN_HLEN;
skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
}
@@ -6801,7 +6887,7 @@ void skb_condense(struct sk_buff *skb)
{
if (skb->data_len) {
if (skb->data_len > skb->end - skb->tail ||
- skb_cloned(skb))
+ skb_cloned(skb) || !skb_frags_readable(skb))
return;
/* Nice, we can free page frag(s) right now */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index bbf40b999713..b1dcbd3be89e 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -293,7 +293,7 @@ out:
/* If we trim data a full sg elem before curr pointer update
* copybreak and current so that any future copy operations
* start at new copy location.
- * However trimed data that has not yet been used in a copy op
+ * However trimmed data that has not yet been used in a copy op
* does not require an update.
*/
if (!msg->sg.size) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 9abc4fe25953..bbb57b5af0b1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -124,6 +124,7 @@
#include <linux/netdevice.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
+#include <linux/skbuff_ref.h>
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
@@ -1049,6 +1050,69 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
return 0;
}
+#ifdef CONFIG_PAGE_POOL
+
+/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
+ * 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens.
+ */
+#define MAX_DONTNEED_TOKENS 128
+
+static noinline_for_stack int
+sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
+{
+ unsigned int num_tokens, i, j, k, netmem_num = 0;
+ struct dmabuf_token *tokens;
+ netmem_ref netmems[16];
+ int ret = 0;
+
+ if (!sk_is_tcp(sk))
+ return -EBADF;
+
+ if (optlen % sizeof(struct dmabuf_token) ||
+ optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
+ return -EINVAL;
+
+ tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
+ if (!tokens)
+ return -ENOMEM;
+
+ num_tokens = optlen / sizeof(struct dmabuf_token);
+ if (copy_from_sockptr(tokens, optval, optlen)) {
+ kvfree(tokens);
+ return -EFAULT;
+ }
+
+ xa_lock_bh(&sk->sk_user_frags);
+ for (i = 0; i < num_tokens; i++) {
+ for (j = 0; j < tokens[i].token_count; j++) {
+ netmem_ref netmem = (__force netmem_ref)__xa_erase(
+ &sk->sk_user_frags, tokens[i].token_start + j);
+
+ if (netmem &&
+ !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
+ netmems[netmem_num++] = netmem;
+ if (netmem_num == ARRAY_SIZE(netmems)) {
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+ netmem_num = 0;
+ xa_lock_bh(&sk->sk_user_frags);
+ }
+ ret++;
+ }
+ }
+ }
+
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+
+ kvfree(tokens);
+ return ret;
+}
+#endif
+
void sockopt_lock_sock(struct sock *sk)
{
/* When current->bpf_ctx is set, the setsockopt is called from
@@ -1211,6 +1275,10 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
ret = -EOPNOTSUPP;
return ret;
}
+#ifdef CONFIG_PAGE_POOL
+ case SO_DEVMEM_DONTNEED:
+ return sock_devmem_dontneed(sk, optval, optlen);
+#endif
}
sockopt_lock_sock(sk);
@@ -2048,7 +2116,7 @@ static inline void sock_lock_init(struct sock *sk)
/*
* Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
- * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ * even temporarily, because of RCU lookups. sk_node should also be left as is.
* We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
@@ -2538,7 +2606,7 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
skb_set_hash_from_sk(skb, sk);
/*
* We used to take a refcount on sk, but following operation
- * is enough to guarantee sk_free() wont free this sock until
+ * is enough to guarantee sk_free() won't free this sock until
* all in-flight packets are completed
*/
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
@@ -3697,7 +3765,7 @@ EXPORT_SYMBOL(sock_recv_errqueue);
*
* FIX: POSIX 1003.1g is very ambiguous here. It states that
* asynchronous errors should be reported by getsockopt. We assume
- * this means if you specify SO_ERROR (otherwise whats the point of it).
+ * this means if you specify SO_ERROR (otherwise what is the point of it).
*/
int sock_common_getsockopt(struct socket *sock, int level, int optname,
char __user *optval, int __user *optlen)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d3dbb92153f2..724b6856fcc3 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1183,6 +1183,7 @@ static void sock_hash_free(struct bpf_map *map)
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
+ cond_resched();
}
/* wait for psock readers accessing its map link */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 5a165286e4d8..4211710393a8 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -173,10 +173,9 @@ static bool __reuseport_detach_closed_sock(struct sock *sk,
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
- unsigned int size = sizeof(struct sock_reuseport) +
- sizeof(struct sock *) * max_socks;
- struct sock_reuseport *reuse = kzalloc(size, GFP_ATOMIC);
+ struct sock_reuseport *reuse;
+ reuse = kzalloc(struct_size(reuse, socks, max_socks), GFP_ATOMIC);
if (!reuse)
return NULL;
diff --git a/net/core/utils.c b/net/core/utils.c
index c994e95172ac..27f4cffaae05 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Generic address resultion entity
+ * Generic address resolution entity
*
* Authors:
* net_random Alan Cox
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index ee7b272ab715..281bbac5539d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -111,9 +111,10 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
* DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
* tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ * (eg, 0x0=port1, 0x2=port3, 0x3=port4)
*/
+#define KSZ8795_TAIL_TAG_EG_PORT_M GENMASK(1, 0)
#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
@@ -141,7 +142,8 @@ static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+ return ksz_common_rcv(skb, dev, tag[0] & KSZ8795_TAIL_TAG_EG_PORT_M,
+ KSZ_EGRESS_TAG_LEN);
}
static const struct dsa_device_ops ksz8795_netdev_ops = {
@@ -176,8 +178,9 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
#define KSZ9477_INGRESS_TAG_LEN 2
#define KSZ9477_PTP_TAG_LEN 4
-#define KSZ9477_PTP_TAG_INDICATION 0x80
+#define KSZ9477_PTP_TAG_INDICATION BIT(7)
+#define KSZ9477_TAIL_TAG_EG_PORT_M GENMASK(2, 0)
#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7)
#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
@@ -310,7 +313,7 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
- unsigned int port = tag[0] & 7;
+ unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M;
unsigned int len = KSZ_EGRESS_TAG_LEN;
/* Extra 4-bytes PTP timestamp */
diff --git a/net/dsa/user.c b/net/dsa/user.c
index f5adfa1d978a..74eda9b30608 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -2642,11 +2642,12 @@ void dsa_user_setup_tagger(struct net_device *user)
user->features = conduit->vlan_features | NETIF_F_HW_TC;
user->hw_features |= NETIF_F_HW_TC;
- user->features |= NETIF_F_LLTX;
if (user->needed_tailroom)
user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
if (ds->needs_standalone_vlan_filtering)
user->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ user->lltx = true;
}
int dsa_user_suspend(struct net_device *user_dev)
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 9a190635fe95..9b540644ba31 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -8,4 +8,5 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
- module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o
+ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \
+ phy.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index f6f136ec7ddf..f22051f33868 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -13,7 +13,7 @@
const struct nla_policy ethnl_cable_test_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd)
@@ -58,6 +58,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct net_device *dev;
int ret;
@@ -69,12 +70,16 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
+
+ rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
- goto out_dev_put;
+ goto out_rtnl;
}
- rtnl_lock();
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test) {
ret = -EOPNOTSUPP;
@@ -85,17 +90,15 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test(dev->phydev, info->extack);
+ ret = ops->start_cable_test(phydev, info->extack);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
- ETHTOOL_MSG_CABLE_TEST_NTF);
+ ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF);
out_rtnl:
rtnl_unlock();
-out_dev_put:
ethnl_parse_header_dev_put(&req_info);
return ret;
}
@@ -160,7 +163,8 @@ void ethnl_cable_test_finished(struct phy_device *phydev)
}
EXPORT_SYMBOL_GPL(ethnl_cable_test_finished);
-int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
+int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair,
+ u8 result, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -173,6 +177,10 @@ int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
goto err;
if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_RESULT_SRC, src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -181,9 +189,10 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_result);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_result_with_src);
-int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
+int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair,
+ u32 cm, u32 src)
{
struct nlattr *nest;
int ret = -EMSGSIZE;
@@ -197,6 +206,11 @@ int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
goto err;
if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm))
goto err;
+ if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) {
+ if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_SRC,
+ src))
+ goto err;
+ }
nla_nest_end(phydev->skb, nest);
return 0;
@@ -205,7 +219,7 @@ err:
nla_nest_cancel(phydev->skb, nest);
return ret;
}
-EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
+EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length_with_src);
static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 },
@@ -216,7 +230,7 @@ static const struct nla_policy cable_test_tdr_act_cfg_policy[] = {
const struct nla_policy ethnl_cable_test_tdr_act_policy[] = {
[ETHTOOL_A_CABLE_TEST_TDR_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED },
};
@@ -305,6 +319,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info req_info = {};
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
struct phy_tdr_config cfg;
struct net_device *dev;
int ret;
@@ -317,10 +332,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
return ret;
dev = req_info.dev;
- if (!dev->phydev) {
- ret = -EOPNOTSUPP;
- goto out_dev_put;
- }
ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG],
info, &cfg);
@@ -328,6 +339,14 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
goto out_dev_put;
rtnl_lock();
+ phydev = ethnl_req_get_phydev(&req_info,
+ tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+ info->extack);
+ if (IS_ERR_OR_NULL(phydev)) {
+ ret = -EOPNOTSUPP;
+ goto out_rtnl;
+ }
+
ops = ethtool_phy_ops;
if (!ops || !ops->start_cable_test_tdr) {
ret = -EOPNOTSUPP;
@@ -338,12 +357,12 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto out_rtnl;
- ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg);
+ ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg);
ethnl_ops_complete(dev);
if (!ret)
- ethnl_cable_test_started(dev->phydev,
+ ethnl_cable_test_started(phydev,
ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
out_rtnl:
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index cee188da54f8..ca4f80282448 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -114,8 +114,7 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
struct net_device *dev = req_info->dev;
struct ethtool_channels channels = {};
struct nlattr **tb = info->attrs;
- u32 err_attr, max_rxfh_in_use;
- u64 max_rxnfc_in_use;
+ u32 err_attr;
int ret;
dev->ethtool_ops->get_channels(dev, &channels);
@@ -166,20 +165,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
return -EINVAL;
}
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings
- */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
- return -EINVAL;
- }
- if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
- GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
- return -EINVAL;
- }
+ ret = ethtool_check_max_channel(dev, channels, info);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h
index e71cc3e1b7eb..3e7c293af78c 100644
--- a/net/ethtool/cmis.h
+++ b/net/ethtool/cmis.h
@@ -108,7 +108,6 @@ void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags);
void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data,
u8 page, u32 offset, u32 length);
-void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data);
struct ethtool_cmis_cdb *
ethtool_cmis_cdb_init(struct net_device *dev,
diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c
index 1bb08783b60d..4d5581147952 100644
--- a/net/ethtool/cmis_cdb.c
+++ b/net/ethtool/cmis_cdb.c
@@ -100,7 +100,8 @@ static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl)
}
static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
- struct net_device *dev)
+ struct net_device *dev,
+ struct ethnl_module_fw_flash_ntf_params *ntf_params)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_module_eeprom page_data = {};
@@ -119,8 +120,12 @@ static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb,
return err;
}
- if (!cmis_cdb_advert_rpl_inst_supported(&rpl))
+ if (!cmis_cdb_advert_rpl_inst_supported(&rpl)) {
+ ethnl_module_fw_flash_ntf_err(dev, ntf_params,
+ "CDB functionality is not supported",
+ NULL);
return -EOPNOTSUPP;
+ }
cdb->read_write_len_ext = rpl.read_write_len_ext;
@@ -282,7 +287,7 @@ ethtool_cmis_cdb_init(struct net_device *dev,
goto err;
}
- err = cmis_cdb_advertisement_get(cdb, dev);
+ err = cmis_cdb_advertisement_get(cdb, dev, ntf_params);
if (err < 0)
goto err;
@@ -444,6 +449,9 @@ static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg)
case 0b01000101:
*err_msg = "CDB status failed: CdbChkCode error";
break;
+ case 0b01000110:
+ *err_msg = "CDB status failed: Password error";
+ break;
default:
*err_msg = "Unknown failure reason";
}
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 07032babd1b6..dd345efa114b 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -6,6 +6,7 @@
#include <linux/rtnetlink.h>
#include <linux/ptp_clock_kernel.h>
+#include "netlink.h"
#include "common.h"
const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
@@ -24,8 +25,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
[NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
[NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
[NETIF_F_GRO_BIT] = "rx-gro",
[NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
[NETIF_F_LRO_BIT] = "rx-lro",
@@ -51,7 +50,6 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
[NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
[NETIF_F_RXHASH_BIT] = "rx-hashing",
[NETIF_F_RXCSUM_BIT] = "rx-checksum",
@@ -429,6 +427,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
[const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
[const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp",
+ [const_ilog2(SOF_TIMESTAMPING_OPT_RX_FILTER)] = "option-rx-filter",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
@@ -539,7 +538,7 @@ static int ethtool_get_rxnfc_rule_count(struct net_device *dev)
return info.rule_cnt;
}
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
+static int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max)
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc *info;
@@ -609,7 +608,7 @@ static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev)
return max_ring;
}
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
+static u32 ethtool_get_max_rxfh_channel(struct net_device *dev)
{
struct ethtool_rxfh_param rxfh = {};
u32 dev_size, current_max;
@@ -650,10 +649,47 @@ out_free:
return current_max;
}
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info)
+{
+ u64 max_rxnfc_in_use;
+ u32 max_rxfh_in_use;
+ int max_mp_in_use;
+
+ /* ensure the new Rx count fits within the configured Rx flow
+ * indirection table/rxnfc settings
+ */
+ if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
+ max_rxnfc_in_use = 0;
+ max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
+ if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing indirection table (%d)", max_rxfh_in_use);
+ return -EINVAL;
+ }
+ if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
+ return -EINVAL;
+ }
+
+ max_mp_in_use = dev_get_min_mp_channel_count(dev);
+ if (channels.combined_count + channels.rx_count <= max_mp_in_use) {
+ if (info)
+ GENL_SET_ERR_MSG_FMT(info, "requested channel counts are too low for existing memory provider setting (%d)", max_mp_in_use);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ethtool_check_ops(const struct ethtool_ops *ops)
{
if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params))
return -EINVAL;
+ if (WARN_ON(ops->rxfh_max_num_contexts == 1))
+ return -EINVAL;
/* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime,
* the fact that ops are checked at registration time does not
* mean the ops attached to a netdev later on are sane.
@@ -665,20 +701,21 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info
{
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
+ int err = 0;
memset(info, 0, sizeof(*info));
info->cmd = ETHTOOL_GET_TS_INFO;
+ info->phc_index = -1;
if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev))
- return phy_ts_info(phydev, info);
- if (ops->get_ts_info)
- return ops->get_ts_info(dev, info);
+ err = phy_ts_info(phydev, info);
+ else if (ops->get_ts_info)
+ err = ops->get_ts_info(dev, info);
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
+ info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
- return 0;
+ return err;
}
int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index 863806fcf01a..d55d5201b085 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -20,6 +20,8 @@ struct link_mode_info {
u8 duplex;
};
+struct genl_info;
+
extern const char
netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
extern const char
@@ -42,8 +44,9 @@ int __ethtool_get_link(struct net_device *dev);
bool convert_legacy_settings_to_link_ksettings(
struct ethtool_link_ksettings *link_ksettings,
const struct ethtool_cmd *legacy_settings);
-u32 ethtool_get_max_rxfh_channel(struct net_device *dev);
-int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max);
+int ethtool_check_max_channel(struct net_device *dev,
+ struct ethtool_channels channels,
+ struct genl_info *info);
int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info);
extern const struct ethtool_phy_ops *ethtool_phy_ops;
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index ae041f51cd2d..65cfe76dafbe 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1230,7 +1230,8 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
rxfh.indir_size = rxfh_dev.indir_size;
@@ -1263,10 +1264,15 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
if (rxfh_dev.indir)
memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx),
indir_bytes);
- if (rxfh_dev.key)
- memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx),
- user_key_size);
- rxfh_dev.hfunc = ctx->hfunc;
+ if (!ops->rxfh_per_ctx_key) {
+ rxfh_dev.key_size = 0;
+ } else {
+ if (rxfh_dev.key)
+ memcpy(rxfh_dev.key,
+ ethtool_rxfh_context_key(ctx),
+ user_key_size);
+ rxfh_dev.hfunc = ctx->hfunc;
+ }
rxfh_dev.input_xfrm = ctx->input_xfrm;
ret = 0;
} else {
@@ -1284,6 +1290,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
sizeof(rxfh.input_xfrm))) {
ret = -EFAULT;
} else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, key_size),
+ &rxfh_dev.key_size,
+ sizeof(rxfh.key_size))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh, rss_config[0]),
rss_config, total_size)) {
ret = -EFAULT;
@@ -1360,7 +1371,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ if (rxfh.rss_context && !(ops->cap_rss_ctx_supported ||
+ ops->create_rxfh_context))
return -EOPNOTSUPP;
/* Check input data transformation capabilities */
if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
@@ -1390,6 +1402,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
+ /* Check settings which may be global rather than per RSS-context */
+ if (rxfh.rss_context && !ops->rxfh_per_ctx_key)
+ if (rxfh.key_size ||
+ (rxfh.hfunc && rxfh.hfunc != ETH_RSS_HASH_NO_CHANGE) ||
+ (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_NO_CHANGE))
+ return -EOPNOTSUPP;
+
rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
@@ -2072,8 +2091,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
{
struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS };
u16 from_channel, to_channel;
- u64 max_rxnfc_in_use;
- u32 max_rxfh_in_use;
unsigned int i;
int ret;
@@ -2103,14 +2120,9 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
(!channels.rx_count || !channels.tx_count))
return -EINVAL;
- /* ensure the new Rx count fits within the configured Rx flow
- * indirection table/rxnfc settings */
- if (ethtool_get_max_rxnfc_channel(dev, &max_rxnfc_in_use))
- max_rxnfc_in_use = 0;
- max_rxfh_in_use = ethtool_get_max_rxfh_channel(dev);
- if (channels.combined_count + channels.rx_count <=
- max_t(u64, max_rxnfc_in_use, max_rxfh_in_use))
- return -EINVAL;
+ ret = ethtool_check_max_channel(dev, channels, NULL);
+ if (ret)
+ return ret;
/* Disabling channels, query zero-copy AF_XDP sockets */
from_channel = channels.combined_count +
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 5c317d23787b..30b8ce275159 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -35,7 +35,7 @@ static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info)
+ if (ret < 0)
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
ethnl_ops_complete(dev);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index b2591db49f7d..259cd9ef1f2a 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -40,7 +40,7 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
return ret;
ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
- if (ret < 0 && info) {
+ if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
goto out;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index cb1eea00e349..e3f0ef6b851b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/phy_link_topology.h>
#include <linux/pm_runtime.h>
#include "netlink.h"
#include "module_fw.h"
@@ -31,6 +32,24 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+const struct nla_policy ethnl_header_policy_phy[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_BASIC),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+const struct nla_policy ethnl_header_policy_phy_stats[] = {
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32,
+ ETHTOOL_FLAGS_STATS),
+ [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid,
enum ethnl_sock_type type)
{
@@ -119,7 +138,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
const struct nlattr *header, struct net *net,
struct netlink_ext_ack *extack, bool require_dev)
{
- struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)];
+ struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy_phy)];
const struct nlattr *devname_attr;
struct net_device *dev = NULL;
u32 flags = 0;
@@ -134,7 +153,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
/* No validation here, command policy should have a nested policy set
* for the header, therefore validation should have already been done.
*/
- ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy) - 1, header,
+ ret = nla_parse_nested(tb, ARRAY_SIZE(ethnl_header_policy_phy) - 1, header,
NULL, extack);
if (ret < 0)
return ret;
@@ -175,11 +194,45 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
+ if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) {
+ if (dev) {
+ req_info->phy_index = nla_get_u32(tb[ETHTOOL_A_HEADER_PHY_INDEX]);
+ } else {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "phy_index set without a netdev");
+ return -EINVAL;
+ }
+ }
+
req_info->dev = dev;
req_info->flags = flags;
return 0;
}
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_device *phydev;
+
+ ASSERT_RTNL();
+
+ if (!req_info->dev)
+ return NULL;
+
+ if (!req_info->phy_index)
+ return req_info->dev->phydev;
+
+ phydev = phy_link_topo_get_phy(req_info->dev, req_info->phy_index);
+ if (!phydev) {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "no phy matching phyindex");
+ return ERR_PTR(-ENODEV);
+ }
+
+ return phydev;
+}
+
/**
* ethnl_fill_reply_header() - Put common header into a reply message
* @skb: skb with the message
@@ -1128,6 +1181,8 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_RSS_GET,
.doit = ethnl_default_doit,
+ .start = ethnl_rss_dump_start,
+ .dumpit = ethnl_rss_dumpit,
.policy = ethnl_rss_get_policy,
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
},
@@ -1179,6 +1234,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_module_fw_flash_act_policy,
.maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PHY_GET,
+ .doit = ethnl_phy_doit,
+ .start = ethnl_phy_start,
+ .dumpit = ethnl_phy_dumpit,
+ .done = ethnl_phy_done,
+ .policy = ethnl_phy_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 46ec273a87c5..203b08eb6c6f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -251,6 +251,9 @@ static inline unsigned int ethnl_reply_header_size(void)
* @dev: network device the request is for (may be null)
* @dev_tracker: refcount tracker for @dev reference
* @flags: request flags common for all request types
+ * @phy_index: phy_device index connected to @dev this request is for. Can be
+ * 0 if the request doesn't target a phy, or if the @dev's attached
+ * phy is targeted.
*
* This is a common base for request specific structures holding data from
* parsed userspace request. These always embed struct ethnl_req_info at
@@ -260,6 +263,7 @@ struct ethnl_req_info {
struct net_device *dev;
netdevice_tracker dev_tracker;
u32 flags;
+ u32 phy_index;
};
static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
@@ -268,6 +272,27 @@ static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info)
}
/**
+ * ethnl_req_get_phydev() - Gets the phy_device targeted by this request,
+ * if any. Must be called under rntl_lock().
+ * @req_info: The ethnl request to get the phy from.
+ * @header: The netlink header, used for error reporting.
+ * @extack: The netlink extended ACK, for error reporting.
+ *
+ * The caller must hold RTNL, until it's done interacting with the returned
+ * phy_device.
+ *
+ * Return: A phy_device pointer corresponding either to the passed phy_index
+ * if one is provided. If not, the phy_device attached to the
+ * net_device targeted by this request is returned. If there's no
+ * targeted net_device, or no phy_device is attached, NULL is
+ * returned. If the provided phy_index is invalid, an error pointer
+ * is returned.
+ */
+struct phy_device *ethnl_req_get_phydev(const struct ethnl_req_info *req_info,
+ const struct nlattr *header,
+ struct netlink_ext_ack *extack);
+
+/**
* struct ethnl_reply_data - base type of reply data for GET requests
* @dev: device for current reply message; in single shot requests it is
* equal to &ethnl_req_info.dev; in dumps it's different for each
@@ -409,9 +434,12 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops;
extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
extern const struct ethnl_request_ops ethnl_mm_request_ops;
+extern const struct ethnl_request_ops ethnl_phy_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
+extern const struct nla_policy ethnl_header_policy_phy[ETHTOOL_A_HEADER_PHY_INDEX + 1];
+extern const struct nla_policy ethnl_header_policy_phy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1];
extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1];
extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1];
extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1];
@@ -449,13 +477,14 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER +
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
-extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
+extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_START_CONTEXT + 1];
extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1];
+extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1];
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
@@ -464,6 +493,12 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info);
+int ethnl_rss_dump_start(struct netlink_callback *cb);
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_start(struct netlink_callback *cb);
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info);
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ethnl_phy_done(struct netlink_callback *cb);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c
new file mode 100644
index 000000000000..ed8f690f6bac
--- /dev/null
+++ b/net/ethtool/phy.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Bootlin
+ *
+ */
+#include "common.h"
+#include "netlink.h"
+
+#include <linux/phy.h>
+#include <linux/phy_link_topology.h>
+#include <linux/sfp.h>
+
+struct phy_req_info {
+ struct ethnl_req_info base;
+ struct phy_device_node *pdn;
+};
+
+#define PHY_REQINFO(__req_base) \
+ container_of(__req_base, struct phy_req_info, base)
+
+const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = {
+ [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+/* Caller holds rtnl */
+static ssize_t
+ethnl_phy_reply_size(const struct ethnl_req_info *req_base,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ size_t size = 0;
+
+ ASSERT_RTNL();
+
+ /* ETHTOOL_A_PHY_INDEX */
+ size += nla_total_size(sizeof(u32));
+
+ /* ETHTOOL_A_DRVNAME */
+ if (phydev->drv)
+ size += nla_total_size(strlen(phydev->drv->name) + 1);
+
+ /* ETHTOOL_A_NAME */
+ size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_TYPE */
+ size += nla_total_size(sizeof(u32));
+
+ if (phy_on_sfp(phydev)) {
+ const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */
+ if (upstream_sfp_name)
+ size += nla_total_size(strlen(upstream_sfp_name) + 1);
+
+ /* ETHTOOL_A_PHY_UPSTREAM_INDEX */
+ size += nla_total_size(sizeof(u32));
+ }
+
+ /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name)
+ size += nla_total_size(strlen(sfp_name) + 1);
+ }
+
+ return size;
+}
+
+static int
+ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb)
+{
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device_node *pdn = req_info->pdn;
+ struct phy_device *phydev = pdn->phy;
+ enum phy_upstream ptype;
+
+ ptype = pdn->upstream_type;
+
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) ||
+ nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) ||
+ nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype))
+ return -EMSGSIZE;
+
+ if (phydev->drv &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name))
+ return -EMSGSIZE;
+
+ if (ptype == PHY_UPSTREAM_PHY) {
+ struct phy_device *upstream = pdn->upstream.phydev;
+ const char *sfp_upstream_name;
+
+ /* Parent index */
+ if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex))
+ return -EMSGSIZE;
+
+ if (pdn->parent_sfp_bus) {
+ sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus);
+ if (sfp_upstream_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_UPSTREAM_SFP_NAME,
+ sfp_upstream_name))
+ return -EMSGSIZE;
+ }
+ }
+
+ if (phydev->sfp_bus) {
+ const char *sfp_name = sfp_get_name(phydev->sfp_bus);
+
+ if (sfp_name &&
+ nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME,
+ sfp_name))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int ethnl_phy_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct phy_link_topology *topo = req_base->dev->link_topo;
+ struct phy_req_info *req_info = PHY_REQINFO(req_base);
+ struct phy_device *phydev;
+
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PHY_HEADER],
+ extack);
+ if (!phydev)
+ return 0;
+
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
+ if (!topo)
+ return 0;
+
+ req_info->pdn = xa_load(&topo->phys, phydev->phyindex);
+
+ return 0;
+}
+
+int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct phy_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ret = ethnl_parse_header_dev_get(&req_info.base,
+ tb[ETHTOOL_A_PHY_HEADER],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ rtnl_lock();
+
+ ret = ethnl_phy_parse_request(&req_info.base, tb, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+
+ /* No PHY, return early */
+ if (!req_info.pdn)
+ goto err_unlock_rtnl;
+
+ ret = ethnl_phy_reply_size(&req_info.base, info->extack);
+ if (ret < 0)
+ goto err_unlock_rtnl;
+ reply_len = ret + ethnl_reply_header_size();
+
+ rskb = ethnl_reply_init(reply_len, req_info.base.dev,
+ ETHTOOL_MSG_PHY_GET_REPLY,
+ ETHTOOL_A_PHY_HEADER,
+ info, &reply_payload);
+ if (!rskb) {
+ ret = -ENOMEM;
+ goto err_unlock_rtnl;
+ }
+
+ ret = ethnl_phy_fill_reply(&req_info.base, rskb);
+ if (ret)
+ goto err_free_msg;
+
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ genlmsg_end(rskb, reply_payload);
+
+ return genlmsg_reply(rskb, info);
+
+err_free_msg:
+ nlmsg_free(rskb);
+err_unlock_rtnl:
+ rtnl_unlock();
+ ethnl_parse_header_dev_put(&req_info.base);
+ return ret;
+}
+
+struct ethnl_phy_dump_ctx {
+ struct phy_req_info *phy_req_info;
+ unsigned long ifindex;
+ unsigned long phy_index;
+};
+
+int ethnl_phy_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL);
+ if (!ctx->phy_req_info)
+ return -ENOMEM;
+
+ ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base,
+ info->attrs[ETHTOOL_A_PHY_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ ctx->ifindex = 0;
+ ctx->phy_index = 0;
+
+ if (ret)
+ kfree(ctx->phy_req_info);
+
+ return ret;
+}
+
+int ethnl_phy_done(struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+
+ if (ctx->phy_req_info->base.dev)
+ ethnl_parse_header_dev_put(&ctx->phy_req_info->base);
+
+ kfree(ctx->phy_req_info);
+
+ return 0;
+}
+
+static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev,
+ struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct phy_req_info *pri = ctx->phy_req_info;
+ struct phy_device_node *pdn;
+ int ret = 0;
+ void *ehdr;
+
+ if (!dev->link_topo)
+ return 0;
+
+ xa_for_each_start(&dev->link_topo->phys, ctx->phy_index, pdn, ctx->phy_index) {
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_PHY_GET_REPLY);
+ if (!ehdr) {
+ ret = -EMSGSIZE;
+ break;
+ }
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_PHY_HEADER);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ pri->pdn = pdn;
+ ret = ethnl_phy_fill_reply(&pri->base, skb);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ break;
+ }
+
+ genlmsg_end(skb, ehdr);
+ }
+
+ return ret;
+}
+
+int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+
+ if (ctx->phy_req_info->base.dev) {
+ ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb);
+ } else {
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ ret = ethnl_phy_dump_one_dev(skb, dev, cb);
+ if (ret)
+ break;
+
+ ctx->phy_index = 0;
+ }
+ }
+ rtnl_unlock();
+
+ return ret;
+}
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
index b1e2e3b5027f..d95d92f173a6 100644
--- a/net/ethtool/plca.c
+++ b/net/ethtool/plca.c
@@ -25,7 +25,7 @@ struct plca_reply_data {
const struct nla_policy ethnl_plca_get_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid,
@@ -58,10 +58,14 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -80,7 +84,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_cfg, 0xff,
sizeof_field(struct plca_reply_data, plca_cfg));
- ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg);
+ ret = ops->get_plca_cfg(phydev, &data->plca_cfg);
ethnl_ops_complete(dev);
out:
@@ -129,7 +133,7 @@ static int plca_get_cfg_fill_reply(struct sk_buff *skb,
const struct nla_policy ethnl_plca_set_cfg_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255),
[ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255),
@@ -141,15 +145,17 @@ const struct nla_policy ethnl_plca_set_cfg_policy[] = {
static int
ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
const struct ethtool_phy_ops *ops;
struct nlattr **tb = info->attrs;
struct phy_plca_cfg plca_cfg;
+ struct phy_device *phydev;
bool mod = false;
int ret;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev)
+ if (IS_ERR_OR_NULL(phydev))
return -EOPNOTSUPP;
ops = ethtool_phy_ops;
@@ -168,7 +174,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
if (!mod)
return 0;
- ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack);
+ ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack);
return ret < 0 ? ret : 1;
}
@@ -191,7 +197,7 @@ const struct ethnl_request_ops ethnl_plca_cfg_request_ops = {
const struct nla_policy ethnl_plca_get_status_policy[] = {
[ETHTOOL_A_PLCA_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
@@ -201,10 +207,14 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
struct plca_reply_data *data = PLCA_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER],
+ info->extack);
// check that the PHY device is available and connected
- if (!dev->phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -223,7 +233,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
memset(&data->plca_st, 0xff,
sizeof_field(struct plca_reply_data, plca_st));
- ret = ops->get_plca_status(dev->phydev, &data->plca_st);
+ ret = ops->get_plca_status(phydev, &data->plca_st);
ethnl_ops_complete(dev);
out:
return ret;
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index ff81aa749784..a0705edca22a 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -28,17 +28,15 @@ struct pse_reply_data {
/* PSE_GET */
const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
};
-static int pse_get_pse_attributes(struct net_device *dev,
+static int pse_get_pse_attributes(struct phy_device *phydev,
struct netlink_ext_ack *extack,
struct pse_reply_data *data)
{
- struct phy_device *phydev = dev->phydev;
-
if (!phydev) {
- NL_SET_ERR_MSG(extack, "No PHY is attached");
+ NL_SET_ERR_MSG(extack, "No PHY found");
return -EOPNOTSUPP;
}
@@ -58,13 +56,20 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base,
{
struct pse_reply_data *data = PSE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = pse_get_pse_attributes(dev, info->extack, data);
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ if (IS_ERR(phydev))
+ return -ENODEV;
+
+ ret = pse_get_pse_attributes(phydev, info->extack, data);
ethnl_ops_complete(dev);
@@ -206,7 +211,7 @@ static void pse_cleanup_data(struct ethnl_reply_data *reply_base)
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
- [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] =
NLA_POLICY_RANGE(NLA_U32, ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
@@ -217,14 +222,11 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
};
static int
-ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+ethnl_set_pse_validate(struct phy_device *phydev, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct phy_device *phydev;
- phydev = dev->phydev;
- if (!phydev) {
+ if (IS_ERR_OR_NULL(phydev)) {
NL_SET_ERR_MSG(info->extack, "No PHY is attached");
return -EOPNOTSUPP;
}
@@ -249,18 +251,21 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
return -EOPNOTSUPP;
}
- return 1;
+ return 0;
}
static int
ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
struct phy_device *phydev;
- int ret = 0;
+ int ret;
- phydev = dev->phydev;
+ phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PSE_HEADER],
+ info->extack);
+ ret = ethnl_set_pse_validate(phydev, info);
+ if (ret)
+ return ret;
if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) {
unsigned int pw_limit;
@@ -307,7 +312,6 @@ const struct ethnl_request_ops ethnl_pse_request_ops = {
.fill_reply = pse_fill_reply,
.cleanup_data = pse_cleanup_data,
- .set_validate = ethnl_set_pse_validate,
.set = ethnl_set_pse,
/* PSE has no notification */
};
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 5c4c4505ab9a..e07386275e14 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -10,6 +10,7 @@ struct rss_req_info {
struct rss_reply_data {
struct ethnl_reply_data base;
+ bool no_key_fields;
u32 indir_size;
u32 hkey_size;
u32 hfunc;
@@ -27,6 +28,7 @@ struct rss_reply_data {
const struct nla_policy ethnl_rss_get_policy[] = {
[ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
[ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 },
+ [ETHTOOL_A_RSS_START_CONTEXT] = { .type = NLA_U32 },
};
static int
@@ -37,18 +39,18 @@ rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb,
if (tb[ETHTOOL_A_RSS_CONTEXT])
request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]);
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ NL_SET_BAD_ATTR(extack, tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ return -EINVAL;
+ }
return 0;
}
static int
-rss_prepare_data(const struct ethnl_req_info *req_base,
- struct ethnl_reply_data *reply_base,
- const struct genl_info *info)
+rss_prepare_get(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
{
- struct rss_reply_data *data = RSS_REPDATA(reply_base);
- struct rss_req_info *request = RSS_REQINFO(req_base);
- struct net_device *dev = reply_base->dev;
struct ethtool_rxfh_param rxfh = {};
const struct ethtool_ops *ops;
u32 total_size, indir_bytes;
@@ -56,12 +58,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
int ret;
ops = dev->ethtool_ops;
- if (!ops->get_rxfh)
- return -EOPNOTSUPP;
-
- /* Some drivers don't handle rss_context */
- if (request->rss_context && !ops->cap_rss_ctx_supported)
- return -EOPNOTSUPP;
ret = ethnl_ops_begin(dev);
if (ret < 0)
@@ -91,7 +87,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
rxfh.indir = data->indir_table;
rxfh.key_size = data->hkey_size;
rxfh.key = data->hkey;
- rxfh.rss_context = request->rss_context;
ret = ops->get_rxfh(dev, &rxfh);
if (ret)
@@ -105,6 +100,67 @@ out_ops:
}
static int
+rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev,
+ struct rss_reply_data *data, const struct genl_info *info)
+{
+ struct ethtool_rxfh_context *ctx;
+ u32 total_size, indir_bytes;
+ u8 *rss_config;
+
+ ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context);
+ if (!ctx)
+ return -ENOENT;
+
+ data->indir_size = ctx->indir_size;
+ data->hkey_size = ctx->key_size;
+ data->hfunc = ctx->hfunc;
+ data->input_xfrm = ctx->input_xfrm;
+
+ indir_bytes = data->indir_size * sizeof(u32);
+ total_size = indir_bytes + data->hkey_size;
+ rss_config = kzalloc(total_size, GFP_KERNEL);
+ if (!rss_config)
+ return -ENOMEM;
+
+ data->indir_table = (u32 *)rss_config;
+ memcpy(data->indir_table, ethtool_rxfh_context_indir(ctx), indir_bytes);
+
+ if (data->hkey_size) {
+ data->hkey = rss_config + indir_bytes;
+ memcpy(data->hkey, ethtool_rxfh_context_key(ctx),
+ data->hkey_size);
+ }
+
+ return 0;
+}
+
+static int
+rss_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ const struct genl_info *info)
+{
+ struct rss_reply_data *data = RSS_REPDATA(reply_base);
+ struct rss_req_info *request = RSS_REQINFO(req_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+
+ ops = dev->ethtool_ops;
+ if (!ops->get_rxfh)
+ return -EOPNOTSUPP;
+
+ /* Some drivers don't handle rss_context */
+ if (request->rss_context) {
+ if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context)
+ return -EOPNOTSUPP;
+
+ data->no_key_fields = !ops->rxfh_per_ctx_key;
+ return rss_prepare_ctx(request, dev, data, info);
+ }
+
+ return rss_prepare_get(request, dev, data, info);
+}
+
+static int
rss_reply_size(const struct ethnl_req_info *req_base,
const struct ethnl_reply_data *reply_base)
{
@@ -131,13 +187,18 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context))
return -EMSGSIZE;
+ if ((data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)))
+ return -EMSGSIZE;
+
+ if (data->no_key_fields)
+ return 0;
+
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
(data->input_xfrm &&
nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
- (data->indir_size &&
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table)) ||
(data->hkey_size &&
nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
@@ -152,6 +213,146 @@ static void rss_cleanup_data(struct ethnl_reply_data *reply_base)
kfree(data->indir_table);
}
+struct rss_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned long ctx_idx;
+
+ /* User wants to only dump contexts from given ifindex */
+ unsigned int match_ifindex;
+ unsigned int start_ctx;
+};
+
+static struct rss_nl_dump_ctx *rss_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct rss_nl_dump_ctx);
+
+ return (struct rss_nl_dump_ctx *)cb->ctx;
+}
+
+int ethnl_rss_dump_start(struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct ethnl_req_info req_info = {};
+ struct nlattr **tb = info->attrs;
+ int ret;
+
+ /* Filtering by context not supported */
+ if (tb[ETHTOOL_A_RSS_CONTEXT]) {
+ NL_SET_BAD_ATTR(info->extack, tb[ETHTOOL_A_RSS_CONTEXT]);
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_RSS_START_CONTEXT]) {
+ ctx->start_ctx = nla_get_u32(tb[ETHTOOL_A_RSS_START_CONTEXT]);
+ ctx->ctx_idx = ctx->start_ctx;
+ }
+
+ ret = ethnl_parse_header_dev_get(&req_info,
+ tb[ETHTOOL_A_RSS_HEADER],
+ sock_net(cb->skb->sk), cb->extack,
+ false);
+ if (req_info.dev) {
+ ctx->match_ifindex = req_info.dev->ifindex;
+ ctx->ifindex = ctx->match_ifindex;
+ ethnl_parse_header_dev_put(&req_info);
+ req_info.dev = NULL;
+ }
+
+ return ret;
+}
+
+static int
+rss_dump_one_ctx(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev, u32 rss_context)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ struct rss_reply_data data = {};
+ struct rss_req_info req = {};
+ void *ehdr;
+ int ret;
+
+ req.rss_context = rss_context;
+
+ ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_RSS_GET_REPLY);
+ if (!ehdr)
+ return -EMSGSIZE;
+
+ ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_RSS_HEADER);
+ if (ret < 0)
+ goto err_cancel;
+
+ /* Context 0 is not currently storred or cached in the XArray */
+ if (!rss_context)
+ ret = rss_prepare_get(&req, dev, &data, info);
+ else
+ ret = rss_prepare_ctx(&req, dev, &data, info);
+ if (ret)
+ goto err_cancel;
+
+ ret = rss_fill_reply(skb, &req.base, &data.base);
+ if (ret)
+ goto err_cleanup;
+ genlmsg_end(skb, ehdr);
+
+ rss_cleanup_data(&data.base);
+ return 0;
+
+err_cleanup:
+ rss_cleanup_data(&data.base);
+err_cancel:
+ genlmsg_cancel(skb, ehdr);
+ return ret;
+}
+
+static int
+rss_dump_one_dev(struct sk_buff *skb, struct netlink_callback *cb,
+ struct net_device *dev)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ int ret;
+
+ if (!dev->ethtool_ops->get_rxfh)
+ return 0;
+
+ if (!ctx->ctx_idx) {
+ ret = rss_dump_one_ctx(skb, cb, dev, 0);
+ if (ret)
+ return ret;
+ ctx->ctx_idx++;
+ }
+
+ for (; xa_find(&dev->ethtool->rss_ctx, &ctx->ctx_idx,
+ ULONG_MAX, XA_PRESENT); ctx->ctx_idx++) {
+ ret = rss_dump_one_ctx(skb, cb, dev, ctx->ctx_idx);
+ if (ret)
+ return ret;
+ }
+ ctx->ctx_idx = ctx->start_ctx;
+
+ return 0;
+}
+
+int ethnl_rss_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rss_nl_dump_ctx *ctx = rss_dump_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ if (ctx->match_ifindex && ctx->match_ifindex != ctx->ifindex)
+ break;
+
+ ret = rss_dump_one_dev(skb, cb, dev);
+ if (ret)
+ break;
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+
const struct ethnl_request_ops ethnl_rss_request_ops = {
.request_cmd = ETHTOOL_MSG_RSS_GET,
.reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY,
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index c678b484a079..b3382b3cf325 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -126,7 +126,7 @@ struct strset_reply_data {
const struct nla_policy ethnl_strset_get_policy[] = {
[ETHTOOL_A_STRSET_HEADER] =
- NLA_POLICY_NESTED(ethnl_header_policy),
+ NLA_POLICY_NESTED(ethnl_header_policy_phy),
[ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED },
[ETHTOOL_A_STRSET_COUNTS_ONLY] = { .type = NLA_FLAG },
};
@@ -233,17 +233,18 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
}
static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
- unsigned int id, bool counts_only)
+ struct phy_device *phydev, unsigned int id,
+ bool counts_only)
{
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
const struct ethtool_ops *ops = dev->ethtool_ops;
void *strings;
int count, ret;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_sset_count)
- ret = phy_ops->get_sset_count(dev->phydev);
+ ret = phy_ops->get_sset_count(phydev);
else if (ops->get_sset_count && ops->get_strings)
ret = ops->get_sset_count(dev, id);
else
@@ -258,10 +259,10 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL);
if (!strings)
return -ENOMEM;
- if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ if (id == ETH_SS_PHY_STATS && phydev &&
!ops->get_ethtool_phy_stats && phy_ops &&
phy_ops->get_strings)
- phy_ops->get_strings(dev->phydev, strings);
+ phy_ops->get_strings(phydev, strings);
else
ops->get_strings(dev, id, strings);
info->strings = strings;
@@ -279,6 +280,8 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
struct strset_reply_data *data = STRSET_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
unsigned int i;
int ret;
@@ -289,14 +292,20 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
for (i = 0; i < ETH_SS_COUNT; i++) {
if ((req_info->req_ids & (1U << i)) &&
data->sets[i].per_dev) {
- if (info)
- GENL_SET_ERR_MSG(info, "requested per device strings without dev");
+ GENL_SET_ERR_MSG(info, "requested per device strings without dev");
return -EINVAL;
}
}
return 0;
}
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_HEADER_FLAGS],
+ info->extack);
+
+ /* phydev can be NULL, check for errors only */
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto err_strset;
@@ -305,7 +314,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base,
!data->sets[i].per_dev)
continue;
- ret = strset_prepare_set(&data->sets[i], dev, i,
+ ret = strset_prepare_set(&data->sets[i], dev, phydev, i,
req_info->counts_only);
if (ret < 0)
goto err_ops;
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
index 89637e732866..7e46d130dce2 100644
--- a/net/handshake/netlink.c
+++ b/net/handshake/netlink.c
@@ -153,7 +153,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
if (!req) {
err = -EBUSY;
trace_handshake_cmd_done_err(net, req, sock->sk, err);
- fput(sock->file);
+ sockfd_put(sock);
return err;
}
@@ -164,7 +164,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]);
handshake_complete(req, status, info);
- fput(sock->file);
+ sockfd_put(sock);
return 0;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 049e22bdaafb..ebdfd5b64e17 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -231,7 +231,9 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb->dev = master->dev;
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
+ spin_lock_bh(&hsr->seqnr_lock);
hsr_forward_skb(skb, master);
+ spin_unlock_bh(&hsr->seqnr_lock);
} else {
dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
@@ -312,10 +314,14 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
/* From HSRv1 on we have separate supervision sequence numbers. */
- if (hsr->prot_version > 0)
- hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
- else
- hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sequence_nr));
+ spin_lock_bh(&hsr->seqnr_lock);
+ if (hsr->prot_version > 0) {
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
+ } else {
+ hsr_stag->sequence_nr = htons(hsr->sequence_nr);
+ hsr->sequence_nr++;
+ }
hsr_stag->tlv.HSR_TLV_type = type;
/* TODO: Why 12 in HSRv0? */
@@ -337,11 +343,13 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
}
- if (skb_put_padto(skb, ETH_ZLEN))
+ if (skb_put_padto(skb, ETH_ZLEN)) {
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
+ }
hsr_forward_skb(skb, port);
-
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
}
@@ -366,7 +374,9 @@ static void send_prp_supervision_frame(struct hsr_port *master,
set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
/* From HSRv1 on we have separate supervision sequence numbers. */
- hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
+ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
+ hsr->sup_sequence_nr++;
hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload);
@@ -374,10 +384,13 @@ static void send_prp_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
- if (skb_put_padto(skb, ETH_ZLEN))
+ if (skb_put_padto(skb, ETH_ZLEN)) {
+ spin_unlock_bh(&hsr->seqnr_lock);
return;
+ }
hsr_forward_skb(skb, master);
+ spin_unlock_bh(&hsr->seqnr_lock);
}
/* Announce (supervision frame) timer function
@@ -545,6 +558,12 @@ void hsr_dev_setup(struct net_device *dev)
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
dev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL;
+ /* Prevent recursive tx locking */
+ dev->lltx = true;
+ /* Not sure about this. Taken from bridge code. netdevice.h says
+ * it means "Does not change network namespaces".
+ */
+ dev->netns_local = true;
dev->needs_free_netdev = true;
@@ -554,16 +573,10 @@ void hsr_dev_setup(struct net_device *dev)
dev->features = dev->hw_features;
- /* Prevent recursive tx locking */
- dev->features |= NETIF_F_LLTX;
/* VLAN on top of HSR needs testing and probably some work on
* hsr_header_create() etc.
*/
dev->features |= NETIF_F_VLAN_CHALLENGED;
- /* Not sure about this. Taken from bridge code. netdev_features.h says
- * it means "Does not change network namespaces".
- */
- dev->features |= NETIF_F_NETNS_LOCAL;
}
/* Return true if dev is a HSR master; return false otherwise.
@@ -612,9 +625,10 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res < 0)
return res;
+ spin_lock_init(&hsr->seqnr_lock);
/* Overflow soon to find bugs easier: */
- atomic_set(&hsr->sequence_nr, HSR_SEQNR_START);
- atomic_set(&hsr->sup_sequence_nr, HSR_SUP_SEQNR_START);
+ hsr->sequence_nr = HSR_SEQNR_START;
+ hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 6f63c8a775c4..b38060246e62 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -599,7 +599,9 @@ static void handle_std_frame(struct sk_buff *skb,
if (port->type == HSR_PT_MASTER ||
port->type == HSR_PT_INTERLINK) {
/* Sequence nr for the master/interlink node */
- frame->sequence_nr = atomic_inc_return(&hsr->sequence_nr);
+ lockdep_assert_held(&hsr->seqnr_lock);
+ frame->sequence_nr = hsr->sequence_nr;
+ hsr->sequence_nr++;
}
}
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 6f7bbf01f3e4..fcfeb79bb040 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -202,9 +202,10 @@ struct hsr_priv {
struct timer_list prune_timer;
struct timer_list prune_proxy_timer;
int announce_count;
- atomic_t sequence_nr;
- atomic_t sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
+ u16 sequence_nr;
+ u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
+ spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 8aea4ff5f49e..f6ff0b61e08a 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -163,7 +163,7 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN,
hsr->sup_multicast_addr) ||
- nla_put_u16(skb, IFLA_HSR_SEQ_NR, atomic_read(&hsr->sequence_nr)))
+ nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr))
goto nla_put_failure;
if (hsr->prot_version == PRP_V1)
proto = HSR_PROTOCOL_PRP;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index af6cf64a00e0..464f683e016d 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -67,7 +67,16 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
skb_set_network_header(skb, ETH_HLEN + HSR_HLEN);
skb_reset_mac_len(skb);
- hsr_forward_skb(skb, port);
+ /* Only the frames received over the interlink port will assign a
+ * sequence number and require synchronisation vs other sender.
+ */
+ if (port->type == HSR_PT_INTERLINK) {
+ spin_lock_bh(&hsr->seqnr_lock);
+ hsr_forward_skb(skb, port);
+ spin_unlock_bh(&hsr->seqnr_lock);
+ } else {
+ hsr_forward_skb(skb, port);
+ }
finish_consume:
return RX_HANDLER_CONSUMED;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 77b4e92027c5..175efd860f7b 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -116,7 +116,7 @@ static void lowpan_setup(struct net_device *ldev)
ldev->netdev_ops = &lowpan_netdev_ops;
ldev->header_ops = &lowpan_header_ops;
ldev->needs_free_netdev = true;
- ldev->features |= NETIF_F_NETNS_LOCAL;
+ ldev->netns_local = true;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[],
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 60e8fff1347e..88adb04e4072 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -226,11 +226,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list_for_each_entry(wpan_dev, &rdev->wpan_dev_list, list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net, "wpan%d");
if (err)
break;
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
if (err) {
@@ -242,11 +242,11 @@ int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
list) {
if (!wpan_dev->netdev)
continue;
- wpan_dev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = false;
err = dev_change_net_namespace(wpan_dev->netdev, net,
"wpan%d");
WARN_ON(err);
- wpan_dev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wpan_dev->netdev->netns_local = true;
}
return err;
@@ -291,7 +291,7 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
switch (state) {
/* TODO NETDEV_DEVTYPE */
case NETDEV_REGISTER:
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e94ed7c56a0..6d2c97f8e9ef 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -661,7 +661,8 @@ config TCP_CONG_CDG
For further details see:
D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
- delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
+ delay gradients." In Networking 2011. Preprint:
+ http://caia.swin.edu.au/cv/dahayes/content/networking2011-cdg-preprint.pdf
config TCP_CONG_BBR
tristate "BBR TCP"
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d96f3e452fef..ab76744383cf 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -216,17 +216,27 @@ static void devinet_sysctl_unregister(struct in_device *idev)
/* Locks all the inet devices. */
-static struct in_ifaddr *inet_alloc_ifa(void)
+static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
+ struct in_ifaddr *ifa;
+
+ ifa = kzalloc(sizeof(*ifa), GFP_KERNEL_ACCOUNT);
+ if (!ifa)
+ return NULL;
+
+ in_dev_hold(in_dev);
+ ifa->ifa_dev = in_dev;
+
+ INIT_HLIST_NODE(&ifa->hash);
+
+ return ifa;
}
static void inet_rcu_free_ifa(struct rcu_head *head)
{
struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
- if (ifa->ifa_dev)
- in_dev_put(ifa->ifa_dev);
+ in_dev_put(ifa->ifa_dev);
kfree(ifa);
}
@@ -574,17 +584,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
ASSERT_RTNL();
- if (!in_dev) {
- inet_free_ifa(ifa);
- return -ENOBUFS;
- }
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- if (ifa->ifa_dev != in_dev) {
- WARN_ON(ifa->ifa_dev);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
- }
+
if (ipv4_is_loopback(ifa->ifa_local))
ifa->ifa_scope = RT_SCOPE_HOST;
return inet_insert_ifa(ifa);
@@ -701,8 +703,6 @@ errout:
return err;
}
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
static void check_lifetime(struct work_struct *work)
{
unsigned long now, next, next_sec, next_sched;
@@ -875,7 +875,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
if (!in_dev)
goto errout;
- ifa = inet_alloc_ifa();
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
/*
* A potential indev allocation can be left alive, it stays
@@ -885,19 +885,15 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ipv4_devconf_setall(in_dev);
neigh_parms_data_state_setall(in_dev->arp_parms);
- in_dev_hold(in_dev);
if (!tb[IFA_ADDRESS])
tb[IFA_ADDRESS] = tb[IFA_LOCAL];
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_prefixlen = ifm->ifa_prefixlen;
ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
ifm->ifa_flags;
ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = in_dev;
-
ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
@@ -1184,10 +1180,12 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
if (!ifa) {
ret = -ENOBUFS;
- ifa = inet_alloc_ifa();
+ if (!in_dev)
+ break;
+ ifa = inet_alloc_ifa(in_dev);
if (!ifa)
break;
- INIT_HLIST_NODE(&ifa->hash);
+
if (colon)
memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
@@ -1586,16 +1584,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (!inetdev_valid_mtu(dev->mtu))
break;
if (dev->flags & IFF_LOOPBACK) {
- struct in_ifaddr *ifa = inet_alloc_ifa();
+ struct in_ifaddr *ifa = inet_alloc_ifa(in_dev);
if (ifa) {
- INIT_HLIST_NODE(&ifa->hash);
ifa->ifa_local =
ifa->ifa_address = htonl(INADDR_LOOPBACK);
ifa->ifa_prefixlen = 8;
ifa->ifa_mask = inet_make_mask(8);
- in_dev_hold(in_dev);
- ifa->ifa_dev = in_dev;
ifa->ifa_scope = RT_SCOPE_HOST;
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
@@ -1948,8 +1943,7 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
}
static size_t inet_get_link_af_size(const struct net_device *dev,
@@ -2145,8 +2139,7 @@ void inet_netconf_notify_devconf(struct net *net, int event, int type,
rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
}
static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 47378ca41904..f3281312eb5e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -115,7 +115,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET_ESPINTCP
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7ad2cafb9276..793e6781399a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
+ .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK,
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -1343,7 +1343,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
- .flowi4_tos = frn->fl_tos,
+ .flowi4_tos = frn->fl_tos & INET_DSCP_MASK,
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 5bdd1c016009..b07292d50ee7 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -37,6 +37,7 @@ struct fib4_rule {
u8 dst_len;
u8 src_len;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
__be32 src;
__be32 srcmask;
__be32 dst;
@@ -186,7 +187,15 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ /* When DSCP selector is used we need to match on the entire DSCP field
+ * in the flow information structure. When TOS selector is used we need
+ * to mask the upper three DSCP bits prior to matching to maintain
+ * legacy behavior.
+ */
+ if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ return 0;
+ else if (!r->dscp_full && r->dscp &&
+ !fib_dscp_masked_match(r->dscp, fl4))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
@@ -217,6 +226,20 @@ static struct fib_table *fib_empty_table(struct net *net)
return NULL;
}
+static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4,
+ struct netlink_ext_ack *extack)
+{
+ if (rule4->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule4->dscp_full = true;
+
+ return 0;
+}
+
static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -238,6 +261,10 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule4->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] &&
+ fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0)
+ goto errout;
+
/* split local/main if they are not already split */
err = fib_unmerge(net);
if (err)
@@ -320,9 +347,19 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule4->dst_len != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule4->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule4->dscp_full ||
+ inet_dscp_to_dsfield(rule4->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule4->dscp_full || rule4->dscp != dscp)
+ return 0;
+ }
+
#ifdef CONFIG_IP_ROUTE_CLASSID
if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
return 0;
@@ -344,7 +381,15 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule4->dst_len;
frh->src_len = rule4->src_len;
- frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+
+ if (rule4->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule4->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule4->dscp);
+ }
if ((rule4->dst_len &&
nla_put_in_addr(skb, FRA_DST, rule4->dst)) ||
@@ -366,7 +411,8 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(4) /* dst */
+ nla_total_size(4) /* src */
- + nla_total_size(4); /* flow */
+ + nla_total_size(4) /* flow */
+ + nla_total_size(1); /* dscp */
}
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57cd2b96e2..ba2df3d2ac15 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -543,8 +543,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
info->nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
}
static int fib_detect_death(struct fib_info *fi, int order,
@@ -2066,8 +2065,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_dscp &&
- fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8f30e3f00b7f..09e31757e96c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1580,8 +1580,7 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_dscp &&
- inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fi->fib_dead))
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab6d0d98dbc3..e1384e7331d8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -93,6 +93,7 @@
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <net/addrconf.h>
+#include <net/inet_dscp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/icmp.h>
@@ -220,61 +221,56 @@ static inline void icmp_xmit_unlock(struct sock *sk)
spin_unlock(&sk->sk_lock.slock);
}
-int sysctl_icmp_msgs_per_sec __read_mostly = 1000;
-int sysctl_icmp_msgs_burst __read_mostly = 50;
-
-static struct {
- spinlock_t lock;
- u32 credit;
- u32 stamp;
-} icmp_global = {
- .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock),
-};
-
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
+ * @net: network namespace
*
* Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
- * Note: called with BH disabled
+ * Works in tandem with icmp_global_consume().
*/
-bool icmp_global_allow(void)
+bool icmp_global_allow(struct net *net)
{
- u32 credit, delta, incr = 0, now = (u32)jiffies;
- bool rc = false;
+ u32 delta, now, oldstamp;
+ int incr, new, old;
- /* Check if token bucket is empty and cannot be refilled
- * without taking the spinlock. The READ_ONCE() are paired
- * with the following WRITE_ONCE() in this same function.
+ /* Note: many cpus could find this condition true.
+ * Then later icmp_global_consume() could consume more credits,
+ * this is an acceptable race.
*/
- if (!READ_ONCE(icmp_global.credit)) {
- delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ);
- if (delta < HZ / 50)
- return false;
- }
+ if (atomic_read(&net->ipv4.icmp_global_credit) > 0)
+ return true;
- spin_lock(&icmp_global.lock);
- delta = min_t(u32, now - icmp_global.stamp, HZ);
- if (delta >= HZ / 50) {
- incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
- if (incr)
- WRITE_ONCE(icmp_global.stamp, now);
- }
- credit = min_t(u32, icmp_global.credit + incr,
- READ_ONCE(sysctl_icmp_msgs_burst));
- if (credit) {
- /* We want to use a credit of one in average, but need to randomize
- * it for security reasons.
- */
- credit = max_t(int, credit - get_random_u32_below(3), 0);
- rc = true;
+ now = jiffies;
+ oldstamp = READ_ONCE(net->ipv4.icmp_global_stamp);
+ delta = min_t(u32, now - oldstamp, HZ);
+ if (delta < HZ / 50)
+ return false;
+
+ incr = READ_ONCE(net->ipv4.sysctl_icmp_msgs_per_sec) * delta / HZ;
+ if (!incr)
+ return false;
+
+ if (cmpxchg(&net->ipv4.icmp_global_stamp, oldstamp, now) == oldstamp) {
+ old = atomic_read(&net->ipv4.icmp_global_credit);
+ do {
+ new = min(old + incr, READ_ONCE(net->ipv4.sysctl_icmp_msgs_burst));
+ } while (!atomic_try_cmpxchg(&net->ipv4.icmp_global_credit, &old, new));
}
- WRITE_ONCE(icmp_global.credit, credit);
- spin_unlock(&icmp_global.lock);
- return rc;
+ return true;
}
EXPORT_SYMBOL(icmp_global_allow);
+void icmp_global_consume(struct net *net)
+{
+ int credits = get_random_u32_below(3);
+
+ /* Note: this might make icmp_global.credit negative. */
+ if (credits)
+ atomic_sub(credits, &net->ipv4.icmp_global_credit);
+}
+EXPORT_SYMBOL(icmp_global_consume);
+
static bool icmpv4_mask_allow(struct net *net, int type, int code)
{
if (type > NR_ICMP_TYPES)
@@ -291,14 +287,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return false;
}
-static bool icmpv4_global_allow(struct net *net, int type, int code)
+static bool icmpv4_global_allow(struct net *net, int type, int code,
+ bool *apply_ratelimit)
{
if (icmpv4_mask_allow(net, type, code))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -308,15 +306,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
*/
static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
- struct flowi4 *fl4, int type, int code)
+ struct flowi4 *fl4, int type, int code,
+ bool apply_ratelimit)
{
struct dst_entry *dst = &rt->dst;
struct inet_peer *peer;
bool rc = true;
int vif;
- if (icmpv4_mask_allow(net, type, code))
- goto out;
+ if (!apply_ratelimit)
+ return true;
/* No rate limit on loopback */
if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
@@ -331,6 +330,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
return rc;
}
@@ -402,6 +403,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
struct ipcm_cookie ipc;
struct rtable *rt = skb_rtable(skb);
struct net *net = dev_net(rt->dst.dev);
+ bool apply_ratelimit = false;
struct flowi4 fl4;
struct sock *sk;
struct inet_sock *inet;
@@ -413,11 +415,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb))
return;
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
- /* global icmp_msgs_per_sec */
- if (!icmpv4_global_allow(net, type, code))
+ /* is global icmp_msgs_per_sec exhausted ? */
+ if (!icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -443,14 +445,14 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.saddr = saddr;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = sock_net_uid(net, NULL);
- fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
+ fl4.flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK;
fl4.flowi4_proto = IPPROTO_ICMP;
fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
goto out_unlock;
- if (icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt);
ip_rt_put(rt);
out_unlock:
@@ -496,7 +498,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->saddr = saddr;
fl4->flowi4_mark = mark;
fl4->flowi4_uid = sock_net_uid(net, NULL);
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
@@ -545,7 +547,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
orefdst = skb_in->_skb_refdst; /* save old refdst */
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
- RT_TOS(tos), rt2->dst.dev);
+ tos, rt2->dst.dev);
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
@@ -596,6 +598,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
int room;
struct icmp_bxm icmp_param;
struct rtable *rt = skb_rtable(skb_in);
+ bool apply_ratelimit = false;
struct ipcm_cookie ipc;
struct flowi4 fl4;
__be32 saddr;
@@ -677,7 +680,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
}
}
- /* Needed by both icmp_global_allow and icmp_xmit_lock */
+ /* Needed by both icmpv4_global_allow and icmp_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit, unless
@@ -685,7 +688,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
* loopback, then peer ratelimit still work (in icmpv4_xrlim_allow)
*/
if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) &&
- !icmpv4_global_allow(net, type, code))
+ !icmpv4_global_allow(net, type, code, &apply_ratelimit))
goto out_bh_enable;
sk = icmp_xmit_lock(net);
@@ -744,7 +747,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
goto out_unlock;
/* peer icmp_ratelimit */
- if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code))
+ if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit))
goto ende;
/* RFC says return as much as we can without exceeding 576 bytes. */
@@ -1487,6 +1490,8 @@ static int __net_init icmp_sk_init(struct net *net)
net->ipv4.sysctl_icmp_ratelimit = 1 * HZ;
net->ipv4.sysctl_icmp_ratemask = 0x1818;
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0;
+ net->ipv4.sysctl_icmp_msgs_per_sec = 1000;
+ net->ipv4.sysctl_icmp_msgs_burst = 50;
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 64d07b842e73..ce4d77f49243 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -236,7 +236,7 @@ static bool inet_bhash2_conflict(const struct sock *sk,
#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
- sk_for_each_bound(sk2, &(__tb2)->owners)
+ sk_for_each_bound((__sk), &(__tb2)->owners)
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9712cdb8087c..67639309163d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -442,7 +442,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
- r->idiag_state = tw->tw_substate;
+ r->idiag_state = READ_ONCE(tw->tw_substate);
r->idiag_timer = 3;
tmo = tw->tw_timer.expires - jiffies;
r->idiag_expires = jiffies_delta_to_msecs(tmo);
@@ -1209,7 +1209,7 @@ next_chunk:
if (num < s_num)
goto next_normal;
state = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_substate : sk->sk_state;
+ READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
if (!(idiag_states & (1 << state)))
goto next_normal;
if (r->sdiag_family != AF_UNSPEC &&
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 48d0d494185b..9bfcfd016e18 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -310,7 +310,7 @@ inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
return inet_lhash2_bucket(h, hash);
}
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum, const __be32 daddr,
const int dif, const int sdif)
{
@@ -348,7 +348,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
@@ -374,7 +374,7 @@ EXPORT_SYMBOL_GPL(inet_lookup_reuseport);
*/
/* called with rcu_read_lock() : No refcount taken on the socket */
-static struct sock *inet_lhash2_lookup(struct net *net,
+static struct sock *inet_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -401,7 +401,7 @@ static struct sock *inet_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet_lookup_run_sk_lookup(struct net *net,
+struct sock *inet_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
__be32 saddr, __be16 sport,
@@ -423,7 +423,7 @@ struct sock *inet_lookup_run_sk_lookup(struct net *net,
return sk;
}
-struct sock *__inet_lookup_listener(struct net *net,
+struct sock *__inet_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const __be32 saddr, __be16 sport,
@@ -488,7 +488,7 @@ void sock_edemux(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_edemux);
-struct sock *__inet_lookup_established(struct net *net,
+struct sock *__inet_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
const __be32 daddr, const u16 hnum,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ba205473522e..5f6fd382af38 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
#include <net/gre.h>
#include <net/dst_metadata.h>
#include <net/erspan.h>
+#include <net/inet_dscp.h>
/*
Problems & solutions
@@ -930,7 +931,7 @@ static int ipgre_open(struct net_device *dev)
t->parms.iph.daddr,
t->parms.iph.saddr,
t->parms.o_key,
- RT_TOS(t->parms.iph.tos),
+ t->parms.iph.tos & INET_DSCP_MASK,
t->parms.link);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
@@ -996,7 +997,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= GRE_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1010,6 +1011,8 @@ static void __gre_tunnel_init(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ipgre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d6fbcbd2358a..b6e7d4921309 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -596,9 +596,8 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct dst_entry *dst;
@@ -646,9 +645,8 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b90d0f78ac80..49811c9281d4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -77,6 +77,7 @@
#include <net/inetpeer.h>
#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
+#include <net/inet_dscp.h>
#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
@@ -493,7 +494,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_TOS(tos),
+ tos & INET_DSCP_MASK,
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -1621,7 +1622,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark) ?: sk->sk_mark,
- RT_TOS(arg->tos),
+ arg->tos & INET_DSCP_MASK,
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
ip_reply_arg_flowi_flags(arg),
daddr, saddr,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5cffad42fe8c..d591c73e2c0e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -43,6 +43,7 @@
#include <net/rtnetlink.h>
#include <net/udp.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -293,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), dev_net(dev),
+ iph->tos & INET_DSCP_MASK, dev_net(dev),
tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -609,9 +610,9 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
- dev_net(dev), 0, skb->mark, skb_get_hash(skb),
- key->flow_flags);
+ tunnel_id_to_key32(key->tun_id),
+ tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark,
+ skb_get_hash(skb), key->flow_flags);
if (!tunnel_hlen)
tunnel_hlen = ip_encap_hlen(&tun_info->encap);
@@ -772,7 +773,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos),
+ tunnel->parms.o_key, tos & INET_DSCP_MASK,
dev_net(dev), READ_ONCE(tunnel->parms.link),
tunnel->fwmark, skb_get_hash(skb), 0);
@@ -1161,7 +1162,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
* Allowing to move it to another netns is clearly unsafe.
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->netns_local = true;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
itn->type = itn->fb_tunnel_dev->type;
@@ -1326,7 +1327,7 @@ int ip_tunnel_init(struct net_device *dev)
tunnel->dev = dev;
tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 14536da9f5dc..f0b4419cef34 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -443,7 +443,7 @@ static int vti_tunnel_init(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
return ip_tunnel_init(dev);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 923a2ef68c2f..dc0db5895e0e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -378,7 +378,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP;
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
netif_keep_dst(dev);
dev->features |= IPIP_FEATURES;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 6c750bd13dd8..089864c6a35e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/rtnh.h>
+#include <net/inet_dscp.h>
#include <linux/nospec.h>
@@ -536,7 +537,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -1868,7 +1869,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
vif->remote, vif->local,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
encap = sizeof(struct iphdr);
@@ -1876,7 +1877,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
0, 0,
IPPROTO_IPIP,
- RT_TOS(iph->tos), vif->link);
+ iph->tos & INET_DSCP_MASK, vif->link);
if (IS_ERR(rt))
goto out_free;
}
@@ -2080,7 +2081,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = RT_TOS(iph->tos),
+ .flowi4_tos = iph->tos & INET_DSCP_MASK,
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
@@ -2406,8 +2407,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
}
static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 591a2737808e..e0aab66cd925 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -14,6 +14,7 @@
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
@@ -43,7 +44,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
*/
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 14365b20f1c5..1cdd9c28ab2d 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -826,7 +826,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1547,7 +1547,7 @@ int arpt_register_table(struct net *net,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fe89a056eb06..3d101613f27f 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -981,7 +981,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
sizeof(info.underflow));
info.num_entries = private->number;
info.size = private->size;
- strcpy(info.name, name);
+ strscpy(info.name, name);
if (copy_to_user(user, &info, *len) != 0)
ret = -EFAULT;
@@ -1767,7 +1767,7 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index ded5bef02f77..1ce7a1655b97 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <net/inet_dscp.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/ip_fib.h>
@@ -75,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
+ flow.flowi4_tos = iph->tos & INET_DSCP_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par));
flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 6cc5743c553a..f4aed0789d69 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -15,6 +15,7 @@
#include <net/icmp.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/inet_dscp.h>
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#include <net/netfilter/nf_conntrack.h>
@@ -32,7 +33,7 @@ static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
fl4.flowi4_oif = oif;
fl4.daddr = gw->s_addr;
- fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index a522c3a3be52..ef5dd88107dd 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -40,13 +40,13 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 9eee535c64dd..00da1332bbf1 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -10,6 +10,7 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_fib.h>
+#include <net/inet_dscp.h>
#include <net/ip_fib.h>
#include <net/route.h>
@@ -22,8 +23,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
-#define DSCP_BITS 0xfc
-
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -110,7 +109,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = iph->tos & DSCP_BITS;
+ fl4.flowi4_tos = iph->tos & INET_DSCP_MASK;
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 6b9787ee8601..93aaea0006ba 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -865,15 +865,18 @@ out:
}
static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
- u32 op_flags)
+ u32 op_flags, u32 *resp_op_flags)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
u16 group_type = 0;
+ u16 weight;
int i;
+ *resp_op_flags |= NHA_OP_FLAG_RESP_GRP_RESVD_0;
+
if (nhg->hash_threshold)
group_type = NEXTHOP_GRP_TYPE_MPATH;
else if (nhg->resilient)
@@ -888,9 +891,12 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh,
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
+ weight = nhg->nh_entries[i].weight - 1;
+
*p++ = (struct nexthop_grp) {
.id = nhg->nh_entries[i].nh->id,
- .weight = nhg->nh_entries[i].weight - 1,
+ .weight = weight,
+ .weight_high = weight >> 8,
};
}
@@ -934,10 +940,12 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ u32 resp_op_flags = 0;
if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
goto nla_put_failure;
- if (nla_put_nh_group(skb, nh, op_flags))
+ if (nla_put_nh_group(skb, nh, op_flags, &resp_op_flags) ||
+ nla_put_u32(skb, NHA_OP_FLAGS, resp_op_flags))
goto nla_put_failure;
goto out;
}
@@ -1050,7 +1058,9 @@ static size_t nh_nlmsg_size(struct nexthop *nh)
sz += nla_total_size(4); /* NHA_ID */
if (nh->is_group)
- sz += nh_nlmsg_size_grp(nh);
+ sz += nh_nlmsg_size_grp(nh) +
+ nla_total_size(4) + /* NHA_OP_FLAGS */
+ 0;
else
sz += nh_nlmsg_size_single(nh);
@@ -1080,8 +1090,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
}
static unsigned long nh_res_bucket_used_time(const struct nh_res_bucket *bucket)
@@ -1201,8 +1210,7 @@ static void nexthop_bucket_notify(struct nh_res_table *res_table,
rtnl_notify(skb, nh->net, 0, RTNLGRP_NEXTHOP, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
+ rtnl_set_sk_err(nh->net, RTNLGRP_NEXTHOP, err);
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
@@ -1280,11 +1288,14 @@ static int nh_check_attr_group(struct net *net,
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
- if (nhg[i].resvd1 || nhg[i].resvd2) {
- NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+ if (nhg[i].resvd2) {
+ NL_SET_ERR_MSG(extack, "Reserved field in nexthop_grp must be 0");
return -EINVAL;
}
- if (nhg[i].weight > 254) {
+ if (nexthop_grp_weight(&nhg[i]) == 0) {
+ /* 0xffff got passed in, representing weight of 0x10000,
+ * which is too heavy.
+ */
NL_SET_ERR_MSG(extack, "Invalid value for weight");
return -EINVAL;
}
@@ -1880,9 +1891,9 @@ static void nh_res_table_cancel_upkeep(struct nh_res_table *res_table)
static void nh_res_group_rebalance(struct nh_group *nhg,
struct nh_res_table *res_table)
{
- int prev_upper_bound = 0;
- int total = 0;
- int w = 0;
+ u16 prev_upper_bound = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
INIT_LIST_HEAD(&res_table->uw_nh_entries);
@@ -1892,11 +1903,12 @@ static void nh_res_group_rebalance(struct nh_group *nhg,
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u16 upper_bound;
+ u64 btw;
w += nhge->weight;
- upper_bound = DIV_ROUND_CLOSEST(res_table->num_nh_buckets * w,
- total);
+ btw = ((u64)res_table->num_nh_buckets) * w;
+ upper_bound = DIV_ROUND_CLOSEST_ULL(btw, total);
nhge->res.wants_buckets = upper_bound - prev_upper_bound;
prev_upper_bound = upper_bound;
@@ -1962,8 +1974,8 @@ static void replace_nexthop_grp_res(struct nh_group *oldg,
static void nh_hthr_group_rebalance(struct nh_group *nhg)
{
- int total = 0;
- int w = 0;
+ u32 total = 0;
+ u32 w = 0;
int i;
for (i = 0; i < nhg->num_nh; ++i)
@@ -1971,7 +1983,7 @@ static void nh_hthr_group_rebalance(struct nh_group *nhg)
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
- int upper_bound;
+ u32 upper_bound;
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
@@ -2713,7 +2725,8 @@ static struct nexthop *nexthop_create_group(struct net *net,
goto out_no_nh;
}
nhg->nh_entries[i].nh = nhe;
- nhg->nh_entries[i].weight = entry[i].weight + 1;
+ nhg->nh_entries[i].weight = nexthop_grp_weight(&entry[i]);
+
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
nhg->nh_entries[i].nh_parent = nh;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 13c0f1d455f3..723ac9181558 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -512,7 +512,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
sk->sk_protocol;
}
- flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
+ flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope,
prot, flow_flags, iph->daddr, iph->saddr, 0, 0,
sock_net_uid(net, sk));
}
@@ -541,7 +541,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
- ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+ ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk),
inet_test_bit(HDRINCL, sk) ?
IPPROTO_RAW : sk->sk_protocol,
@@ -1263,7 +1263,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & IPTOS_RT_MASK,
+ .flowi4_tos = iph->tos & INET_DSCP_MASK,
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -2160,7 +2160,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= IPTOS_RT_MASK;
+ tos &= INET_DSCP_MASK;
err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
if (err < 0)
goto martian_source;
@@ -2470,7 +2470,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct fib_result res;
int err;
- tos &= IPTOS_RT_MASK;
+ tos &= INET_DSCP_MASK;
rcu_read_lock();
err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
rcu_read_unlock();
@@ -2618,7 +2618,7 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
struct rtable *rth;
fl4->flowi4_iif = LOOPBACK_IFINDEX;
- fl4->flowi4_tos &= IPTOS_RT_MASK;
+ fl4->flowi4_tos &= INET_DSCP_MASK;
rcu_read_lock();
rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
@@ -3261,7 +3261,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
+ fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
@@ -3286,7 +3286,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & IPTOS_RT_MASK, dev,
+ rtm->rtm_tos & INET_DSCP_MASK, dev,
&res);
rt = skb_rtable(skb);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4af0c234d8d7..a79b2a52ce01 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -601,22 +601,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_available_ulp,
},
{
- .procname = "icmp_msgs_per_sec",
- .data = &sysctl_icmp_msgs_per_sec,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "icmp_msgs_burst",
- .data = &sysctl_icmp_msgs_burst,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
@@ -702,6 +686,22 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_msgs_per_sec",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_per_sec,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
+ .procname = "icmp_msgs_burst",
+ .data = &init_net.ipv4.sysctl_icmp_msgs_burst,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
+ {
.procname = "ping_group_range",
.data = &init_net.ipv4.ping_group_range.range,
.maxlen = sizeof(gid_t)*2,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 831a18dc7aa6..4f77bd862e95 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -285,6 +285,8 @@
#include <trace/events/tcp.h>
#include <net/rps.h>
+#include "../core/devmem.h"
+
/* Track pending CMSGs. */
enum {
TCP_CMSG_INQ = 1,
@@ -471,6 +473,7 @@ void tcp_init_sock(struct sock *sk)
set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
sk_sockets_allocated_inc(sk);
+ xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1);
}
EXPORT_SYMBOL(tcp_init_sock);
@@ -2160,6 +2163,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
skb = tcp_recv_skb(sk, seq, &offset);
}
+ if (!skb_frags_readable(skb))
+ break;
+
if (TCP_SKB_CB(skb)->has_rxtstamp) {
tcp_update_recv_tstamps(skb, tss);
zc->msg_flags |= TCP_CMSG_TS;
@@ -2177,6 +2183,9 @@ static int tcp_zerocopy_receive(struct sock *sk,
break;
}
page = skb_frag_page(frags);
+ if (WARN_ON_ONCE(!page))
+ break;
+
prefetchw(page);
pages[pages_to_map++] = page;
length += PAGE_SIZE;
@@ -2235,6 +2244,7 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
struct scm_timestamping_internal *tss)
{
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
bool has_timestamping = false;
if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
@@ -2274,14 +2284,18 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
+ if (tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
@@ -2317,6 +2331,220 @@ static int tcp_inq_hint(struct sock *sk)
return inq;
}
+/* batch __xa_alloc() calls and reduce xa_lock()/xa_unlock() overhead. */
+struct tcp_xa_pool {
+ u8 max; /* max <= MAX_SKB_FRAGS */
+ u8 idx; /* idx <= max */
+ __u32 tokens[MAX_SKB_FRAGS];
+ netmem_ref netmems[MAX_SKB_FRAGS];
+};
+
+static void tcp_xa_pool_commit_locked(struct sock *sk, struct tcp_xa_pool *p)
+{
+ int i;
+
+ /* Commit part that has been copied to user space. */
+ for (i = 0; i < p->idx; i++)
+ __xa_cmpxchg(&sk->sk_user_frags, p->tokens[i], XA_ZERO_ENTRY,
+ (__force void *)p->netmems[i], GFP_KERNEL);
+ /* Rollback what has been pre-allocated and is no longer needed. */
+ for (; i < p->max; i++)
+ __xa_erase(&sk->sk_user_frags, p->tokens[i]);
+
+ p->max = 0;
+ p->idx = 0;
+}
+
+static void tcp_xa_pool_commit(struct sock *sk, struct tcp_xa_pool *p)
+{
+ if (!p->max)
+ return;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ xa_unlock_bh(&sk->sk_user_frags);
+}
+
+static int tcp_xa_pool_refill(struct sock *sk, struct tcp_xa_pool *p,
+ unsigned int max_frags)
+{
+ int err, k;
+
+ if (p->idx < p->max)
+ return 0;
+
+ xa_lock_bh(&sk->sk_user_frags);
+
+ tcp_xa_pool_commit_locked(sk, p);
+
+ for (k = 0; k < max_frags; k++) {
+ err = __xa_alloc(&sk->sk_user_frags, &p->tokens[k],
+ XA_ZERO_ENTRY, xa_limit_31b, GFP_KERNEL);
+ if (err)
+ break;
+ }
+
+ xa_unlock_bh(&sk->sk_user_frags);
+
+ p->max = k;
+ p->idx = 0;
+ return k ? 0 : err;
+}
+
+/* On error, returns the -errno. On success, returns number of bytes sent to the
+ * user. May not consume all of @remaining_len.
+ */
+static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
+ unsigned int offset, struct msghdr *msg,
+ int remaining_len)
+{
+ struct dmabuf_cmsg dmabuf_cmsg = { 0 };
+ struct tcp_xa_pool tcp_xa_pool;
+ unsigned int start;
+ int i, copy, n;
+ int sent = 0;
+ int err = 0;
+
+ tcp_xa_pool.max = 0;
+ tcp_xa_pool.idx = 0;
+ do {
+ start = skb_headlen(skb);
+
+ if (skb_frags_readable(skb)) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* Copy header. */
+ copy = start - offset;
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ n = copy_to_iter(skb->data + offset, copy,
+ &msg->msg_iter);
+ if (n != copy) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ offset += copy;
+ remaining_len -= copy;
+
+ /* First a dmabuf_cmsg for # bytes copied to user
+ * buffer.
+ */
+ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg));
+ dmabuf_cmsg.frag_size = copy;
+ err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR,
+ sizeof(dmabuf_cmsg), &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+
+ /* after that, send information of dmabuf pages through a
+ * sequence of cmsg
+ */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct net_iov *niov;
+ u64 frag_offset;
+ int end;
+
+ /* !skb_frags_readable() should indicate that ALL the
+ * frags in this skb are dmabuf net_iovs. We're checking
+ * for that flag above, but also check individual frags
+ * here. If the tcp stack is not setting
+ * skb_frags_readable() correctly, we still don't want
+ * to crash here.
+ */
+ if (!skb_frag_net_iov(frag)) {
+ net_err_ratelimited("Found non-dmabuf skb with net_iov");
+ err = -ENODEV;
+ goto out;
+ }
+
+ niov = skb_frag_net_iov(frag);
+ end = start + skb_frag_size(frag);
+ copy = end - offset;
+
+ if (copy > 0) {
+ copy = min(copy, remaining_len);
+
+ frag_offset = net_iov_virtual_addr(niov) +
+ skb_frag_off(frag) + offset -
+ start;
+ dmabuf_cmsg.frag_offset = frag_offset;
+ dmabuf_cmsg.frag_size = copy;
+ err = tcp_xa_pool_refill(sk, &tcp_xa_pool,
+ skb_shinfo(skb)->nr_frags - i);
+ if (err)
+ goto out;
+
+ /* Will perform the exchange later */
+ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
+ dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+
+ offset += copy;
+ remaining_len -= copy;
+
+ err = put_cmsg(msg, SOL_SOCKET,
+ SO_DEVMEM_DMABUF,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err || msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~MSG_CTRUNC;
+ if (!err)
+ err = -ETOOSMALL;
+ goto out;
+ }
+
+ atomic_long_inc(&niov->pp_ref_count);
+ tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
+
+ sent += copy;
+
+ if (remaining_len == 0)
+ goto out;
+ }
+ start = end;
+ }
+
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!remaining_len)
+ goto out;
+
+ /* if remaining_len is not satisfied yet, we need to go to the
+ * next frag in the frag_list to satisfy remaining_len.
+ */
+ skb = skb_shinfo(skb)->frag_list ?: skb->next;
+
+ offset = offset - start;
+ } while (skb);
+
+ if (remaining_len) {
+ err = -EFAULT;
+ goto out;
+ }
+
+out:
+ tcp_xa_pool_commit(sk, &tcp_xa_pool);
+ if (!sent)
+ sent = err;
+
+ return sent;
+}
+
/*
* This routine copies from a sock struct into the user buffer.
*
@@ -2330,6 +2558,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int last_copied_dmabuf = -1; /* uninitialized */
int copied = 0;
u32 peek_seq;
u32 *seq;
@@ -2509,15 +2738,44 @@ found_ok_skb:
}
if (!(flags & MSG_TRUNC)) {
- err = skb_copy_datagram_msg(skb, offset, msg, used);
- if (err) {
- /* Exception. Bailout! */
- if (!copied)
- copied = -EFAULT;
+ if (last_copied_dmabuf != -1 &&
+ last_copied_dmabuf != !skb_frags_readable(skb))
break;
+
+ if (skb_frags_readable(skb)) {
+ err = skb_copy_datagram_msg(skb, offset, msg,
+ used);
+ if (err) {
+ /* Exception. Bailout! */
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
+ } else {
+ if (!(flags & MSG_SOCK_DEVMEM)) {
+ /* dmabuf skbs can only be received
+ * with the MSG_SOCK_DEVMEM flag.
+ */
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+
+ err = tcp_recvmsg_dmabuf(sk, skb, offset, msg,
+ used);
+ if (err <= 0) {
+ if (!copied)
+ copied = -EFAULT;
+
+ break;
+ }
+ used = err;
}
}
+ last_copied_dmabuf = !skb_frags_readable(skb);
+
WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@ -2833,7 +3091,7 @@ void __tcp_close(struct sock *sk, long timeout)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, sk->sk_allocation,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_CLOSE);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
@@ -2908,7 +3166,7 @@ adjudge_to_death:
if (READ_ONCE(tp->linger2) < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_LINGER);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONLINGER);
} else {
@@ -2927,7 +3185,7 @@ adjudge_to_death:
if (tcp_check_oom(sk, 0)) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
__NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPABORTONMEMORY);
} else if (!check_net(sock_net(sk))) {
@@ -3025,13 +3283,16 @@ int tcp_disconnect(struct sock *sk, int flags)
inet_csk_listen_stop(sk);
} else if (unlikely(tp->repair)) {
WRITE_ONCE(sk->sk_err, ECONNABORTED);
- } else if (tcp_need_reset(old_state) ||
- (tp->snd_nxt != tp->write_seq &&
- (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
+ } else if (tcp_need_reset(old_state)) {
+ tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE);
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
+ } else if (tp->snd_nxt != tp->write_seq &&
+ (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
/* The last check adjusts for discrepancy of Linux wrt. RFC
* states
*/
- tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, gfp_any(),
+ SK_RST_REASON_TCP_DISCONNECT_WITH_DATA);
WRITE_ONCE(sk->sk_err, ECONNRESET);
} else if (old_state == TCP_SYN_SENT)
WRITE_ONCE(sk->sk_err, ECONNRESET);
@@ -4655,7 +4916,7 @@ int tcp_abort(struct sock *sk, int err)
if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_STATE);
tcp_done_with_error(sk, err);
bh_unlock_sock(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index fe6178715ba0..e7658c5d6b79 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -30,7 +30,7 @@ void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
- struct sk_msg *msg, u32 apply_bytes, int flags)
+ struct sk_msg *msg, u32 apply_bytes)
{
bool apply = apply_bytes;
struct scatterlist *sge;
@@ -167,7 +167,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
if (unlikely(!psock))
return -EPIPE;
- ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+ ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
return ret;
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 52b1f2665dfa..81b96331b2bb 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -185,7 +185,7 @@ static inline void htcp_alpha_update(struct htcp *ca)
u32 scale = (HZ << 3) / (10 * minRTT);
/* clamping ratio to interval [0.5,10]<<3 */
- scale = min(max(scale, 1U << 2), 10U << 3);
+ scale = clamp(scale, 1U << 2, 10U << 3);
factor = (factor << 3) / scale;
if (!factor)
factor = 1;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e37488d3453f..9f314dfa1490 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5391,6 +5391,9 @@ restart:
for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
n = tcp_skb_next(skb, list);
+ if (!skb_frags_readable(skb))
+ goto skip_this;
+
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
skb = tcp_collapse_one(sk, skb, list, root);
@@ -5411,17 +5414,20 @@ restart:
break;
}
- if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) &&
+ if (n && n != tail && skb_frags_readable(n) &&
+ tcp_skb_can_collapse_rx(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
}
+skip_this:
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
}
if (end_of_skbs ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
return;
__skb_queue_head_init(&tmp);
@@ -5463,7 +5469,8 @@ restart:
if (!skb ||
skb == tail ||
!tcp_skb_can_collapse_rx(nskb, skb) ||
- (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
+ (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) ||
+ !skb_frags_readable(skb))
goto end;
}
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a4e510846905..5afe5e57c89b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -79,6 +79,7 @@
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <linux/btf_ids.h>
+#include <linux/skbuff_ref.h>
#include <crypto/hash.h>
#include <linux/scatterlist.h>
@@ -120,6 +121,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
struct tcp_sock *tp = tcp_sk(sk);
int ts_recent_stamp;
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2)
+ reuse = 0;
+
if (reuse == 2) {
/* Still does not detect *everything* that goes through
* lo, since we require a loopback src or dst address
@@ -1070,7 +1074,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
}
tcp_v4_send_ack(sk, skb,
- tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcptw->tw_snd_nxt, READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent),
@@ -2509,10 +2513,25 @@ static void tcp_md5sig_info_free_rcu(struct rcu_head *head)
}
#endif
+static void tcp_release_user_frags(struct sock *sk)
+{
+#ifdef CONFIG_PAGE_POOL
+ unsigned long index;
+ void *netmem;
+
+ xa_for_each(&sk->sk_user_frags, index, netmem)
+ WARN_ON_ONCE(!napi_pp_put_page((__force netmem_ref)netmem));
+#endif
+}
+
void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ tcp_release_user_frags(sk);
+
+ xa_destroy(&sk->sk_user_frags);
+
trace_tcp_destroy_sock(sk);
tcp_clear_xmit_timers(sk);
@@ -2945,7 +2964,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
- i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
+ i, src, srcp, dest, destp, READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b01eb6d94413..95669935494e 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -617,9 +617,13 @@ static struct genl_family tcp_metrics_nl_family;
static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
[TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
- [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr), },
+ [TCP_METRICS_ATTR_ADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
[TCP_METRICS_ATTR_SADDR_IPV4] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_SADDR_IPV6] =
+ NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
+
/* Following attributes are not received for GET/DEL,
* we keep them for reference
*/
@@ -811,8 +815,6 @@ static int __parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
if (a) {
struct in6_addr in6;
- if (nla_len(a) != sizeof(struct in6_addr))
- return -EINVAL;
in6 = nla_get_in6_addr(a);
inetpeer_set_addr_v6(addr, &in6);
if (hash)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a19a9dbd3409..bb1fe1ba867a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -52,16 +52,17 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
return TCP_TW_SUCCESS;
}
-static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
+static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq,
+ u32 rcv_nxt)
{
#ifdef CONFIG_TCP_AO
struct tcp_ao_info *ao;
ao = rcu_dereference(tcptw->ao_info);
- if (unlikely(ao && seq < tcptw->tw_rcv_nxt))
+ if (unlikely(ao && seq < rcv_nxt))
WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1);
#endif
- tcptw->tw_rcv_nxt = seq;
+ WRITE_ONCE(tcptw->tw_rcv_nxt, seq);
}
/*
@@ -98,8 +99,9 @@ enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th, u32 *tw_isn)
{
- struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+ u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt);
+ struct tcp_options_received tmp_opt;
bool paws_reject = false;
int ts_recent_stamp;
@@ -117,26 +119,26 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
}
}
- if (tw->tw_substate == TCP_FIN_WAIT2) {
+ if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) {
/* Just repeat all the checks of tcp_rcv_state_process() */
/* Out of window, send ACK */
if (paws_reject ||
!tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcptw->tw_rcv_nxt,
- tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
+ rcv_nxt,
+ rcv_nxt + tcptw->tw_rcv_wnd))
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
if (th->rst)
goto kill;
- if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt))
+ if (th->syn && !before(TCP_SKB_CB(skb)->seq, rcv_nxt))
return TCP_TW_RST;
/* Dup ACK? */
if (!th->ack ||
- !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) ||
+ !after(TCP_SKB_CB(skb)->end_seq, rcv_nxt) ||
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
@@ -146,12 +148,13 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* reset.
*/
if (!th->fin ||
- TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1)
+ TCP_SKB_CB(skb)->end_seq != rcv_nxt + 1)
return TCP_TW_RST;
/* FIN arrived, enter true time-wait state. */
- tw->tw_substate = TCP_TIME_WAIT;
- twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq);
+ WRITE_ONCE(tw->tw_substate, TCP_TIME_WAIT);
+ twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq,
+ rcv_nxt);
if (tmp_opt.saw_tstamp) {
WRITE_ONCE(tcptw->tw_ts_recent_stamp,
@@ -182,7 +185,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
*/
if (!paws_reject &&
- (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt &&
+ (TCP_SKB_CB(skb)->seq == rcv_nxt &&
(TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
/* In window segment, it may be only reset or bare ack. */
@@ -229,7 +232,7 @@ kill:
*/
if (th->syn && !th->rst && !th->ack && !paws_reject &&
- (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) ||
+ (after(TCP_SKB_CB(skb)->seq, rcv_nxt) ||
(tmp_opt.saw_tstamp &&
(s32)(READ_ONCE(tcptw->tw_ts_recent) - tmp_opt.rcv_tsval) < 0))) {
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
@@ -625,6 +628,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+ xa_init_flags(&newsk->sk_user_frags, XA_FLAGS_ALLOC1);
+
return newsk;
}
EXPORT_SYMBOL(tcp_create_openreq_child);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 16c48df8df4c..4fd746bd4d54 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2344,7 +2344,8 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (unlikely(TCP_SKB_CB(skb)->eor) ||
tcp_has_tx_tstamp(skb) ||
- !skb_pure_zcopy_same(skb, next))
+ !skb_pure_zcopy_same(skb, next) ||
+ skb_frags_readable(skb) != skb_frags_readable(next))
return false;
len -= skb->len;
@@ -3264,6 +3265,8 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
+ if (!skb_frags_readable(skb))
+ return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -3649,7 +3652,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority,
/* skb of trace_tcp_send_reset() keeps the skb that caused RST,
* skb here is different to the troublesome skb, so use NULL
*/
- trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED);
+ trace_tcp_send_reset(sk, NULL, reason);
}
/* Send a crossed SYN-ACK during socket establishment.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4d40615dc8fc..79064580c8c0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
do_reset = true;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_ABORT_ON_MEMORY);
tcp_done(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
@@ -282,6 +282,7 @@ static int tcp_write_timeout(struct sock *sk)
expired = retransmits_timed_out(sk, retry_until,
READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
+ mptcp_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
@@ -779,7 +780,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
goto out;
}
}
- tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED);
+ tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE);
goto death;
}
@@ -807,7 +808,7 @@ static void tcp_keepalive_timer (struct timer_list *t)
(user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC,
- SK_RST_REASON_NOT_SPECIFIED);
+ SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT);
tcp_write_err(sk);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 49c622e743e8..8accbf4cb295 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -115,6 +115,7 @@
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
#include <net/gro.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -365,7 +366,7 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum,
int dif, int sdif)
@@ -420,7 +421,7 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport,
}
/* called with rcu_read_lock() */
-static struct sock *udp4_lib_lookup2(struct net *net,
+static struct sock *udp4_lib_lookup2(const struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
@@ -480,7 +481,7 @@ rescore:
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
-struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
+struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
@@ -561,7 +562,7 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
-struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
+struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -2618,7 +2619,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
iph->saddr,
- iph->tos & IPTOS_RT_MASK,
+ iph->tos & INET_DSCP_MASK,
skb->dev, in_dev, &itag);
}
return 0;
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index e4e0fa869fa4..619a53eb672d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -6,6 +6,7 @@
#include <net/dst_metadata.h>
#include <net/udp.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket **sockp)
@@ -232,7 +233,7 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
fl4.saddr = key->u.ipv4.src;
fl4.fl4_dport = dport;
fl4.fl4_sport = sport;
- fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_tos = tos & INET_DSCP_MASK;
fl4.flowi4_flags = key->flow_flags;
rt = ip_route_output_key(net, &fl4);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f70d8757af1a..d680beb91b0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -92,8 +92,6 @@
#include <linux/export.h>
#include <linux/ioam6.h>
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
@@ -239,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -302,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -2762,6 +2762,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
+ bool ignore_autoconf = false;
pinfo = (struct prefix_info *) opt;
@@ -2864,7 +2865,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ ignore_autoconf = READ_ONCE(in6_dev->cnf.ra_honor_pio_pflag) && pinfo->preferpd;
+ if (pinfo->autoconf && in6_dev->cnf.autoconf && !ignore_autoconf) {
struct in6_addr addr;
bool tokenized = false, dev_addr_generated = false;
@@ -5617,8 +5619,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
@@ -6173,8 +6174,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -6241,8 +6241,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -6926,6 +6925,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "ra_honor_pio_pflag",
+ .data = &ipv6_devconf.ra_honor_pio_pflag,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 90d2c7e3f5e9..ba69b86f1c7d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -708,6 +708,7 @@ const struct proto_ops inet6_stream_ops = {
.splice_eof = inet_splice_eof,
.sendmsg_locked = tcp_sendmsg_locked,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 3920e8aa1031..b2400c226a32 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -132,7 +132,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- skb_page_unref(sg_page(sg), skb->pp_recycle);
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 9e254de7462f..04a9ed5e8310 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -27,6 +27,7 @@ struct fib6_rule {
struct rt6key src;
struct rt6key dst;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
@@ -345,6 +346,20 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 1;
}
+static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ if (rule6->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_full = true;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -361,6 +376,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -413,9 +431,19 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule6->dscp_full ||
+ inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
return 0;
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule6->dscp_full || rule6->dscp != dscp)
+ return 0;
+ }
+
if (frh->src_len &&
nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr)))
return 0;
@@ -434,7 +462,15 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+
+ if (rule6->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+ }
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -450,7 +486,8 @@ nla_put_failure:
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
- + nla_total_size(16); /* src */
+ + nla_total_size(16) /* src */
+ + nla_total_size(1); /* dscp */
}
static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7b31674644ef..071b0bc1179d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false;
}
-static bool icmpv6_global_allow(struct net *net, int type)
+static bool icmpv6_global_allow(struct net *net, int type,
+ bool *apply_ratelimit)
{
if (icmpv6_mask_allow(net, type))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit
*/
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+ struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(net, type))
+ if (!apply_ratelimit)
return true;
/*
@@ -228,6 +230,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
dst_release(dst);
return res;
}
@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
+ bool apply_ratelimit = false;
struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
@@ -533,11 +538,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
return;
}
- /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
+ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable;
mip6_addr_swap(skb, parm);
@@ -575,7 +581,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
- if (!icmpv6_xrlim_allow(sk, type, &fl6))
+ if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
goto out;
tmp_hdr.icmp6_type = type;
@@ -717,6 +723,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
+ bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
@@ -781,8 +788,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out;
/* Check the ratelimit */
- if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
- !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ if ((!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release;
idev = __in6_dev_get(skb->dev);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6db71bb1cd30..9ec05e354baa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn);
*
* The sockhash lock must be held as a reader here.
*/
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
@@ -89,7 +89,7 @@ found:
}
EXPORT_SYMBOL(__inet6_lookup_established);
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
const int dif, const int sdif)
@@ -126,7 +126,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
@@ -147,7 +147,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
-static struct sock *inet6_lhash2_lookup(struct net *net,
+static struct sock *inet6_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -174,7 +174,7 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -199,7 +199,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -243,7 +243,8 @@ done:
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net,
+ struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index bf7120ecea1e..beb6b4cfc551 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -42,8 +42,10 @@ struct ioam6_lwt {
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
u8 mode;
+ bool has_tunsrc;
+ struct in6_addr tunsrc;
struct in6_addr tundst;
- struct ioam6_lwt_encap tuninfo;
+ struct ioam6_lwt_encap tuninfo;
};
static const struct netlink_range_validation freq_range = {
@@ -72,8 +74,10 @@ static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
+ [IOAM6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(
+ sizeof(struct ioam6_trace_hdr)),
};
static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
@@ -85,7 +89,7 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21)
+ trace->type.bit21 | trace->type.bit23)
return false;
trace->nodelen = 0;
@@ -143,6 +147,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
else
mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
+ NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
+ return -EINVAL;
+ }
+
if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
return -EINVAL;
@@ -167,19 +176,40 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
ilwt = ioam6_lwt_state(lwt);
err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
- if (err) {
- kfree(lwt);
- return err;
- }
+ if (err)
+ goto free_lwt;
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
ilwt->mode = mode;
- if (tb[IOAM6_IPTUNNEL_DST])
+
+ if (!tb[IOAM6_IPTUNNEL_SRC]) {
+ ilwt->has_tunsrc = false;
+ } else {
+ ilwt->has_tunsrc = true;
+ ilwt->tunsrc = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_SRC]);
+
+ if (ipv6_addr_any(&ilwt->tunsrc)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_SRC],
+ "invalid tunnel source address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
+ if (tb[IOAM6_IPTUNNEL_DST]) {
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
+ if (ipv6_addr_any(&ilwt->tundst)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_DST],
+ "invalid tunnel dest address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
tuninfo = ioam6_lwt_info(lwt);
tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
tuninfo->pad[0] = IPV6_TLV_PADN;
@@ -201,6 +231,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
*ts = lwt;
return 0;
+free_cache:
+ dst_cache_destroy(&ilwt->cache);
+free_lwt:
+ kfree(lwt);
+ return err;
}
static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
@@ -256,6 +291,8 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
+ bool has_tunsrc,
+ struct in6_addr *tunsrc,
struct in6_addr *tundst)
{
struct dst_entry *dst = skb_dst(skb);
@@ -285,8 +322,12 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdr->nexthdr = NEXTHDR_HOP;
hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
hdr->daddr = *tundst;
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
- IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
+
+ if (has_tunsrc)
+ memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
+ else
+ ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -328,7 +369,9 @@ do_inline:
case IOAM6_IPTUNNEL_MODE_ENCAP:
do_encap:
/* Encapsulation (ip6ip6) */
- err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+ err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
+ ilwt->has_tunsrc, &ilwt->tunsrc,
+ &ilwt->tundst);
if (unlikely(err))
goto drop;
@@ -414,6 +457,13 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
goto ret;
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc) {
+ err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_SRC,
+ &ilwt->tunsrc);
+ if (err)
+ goto ret;
+ }
+
err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
if (err)
goto ret;
@@ -435,8 +485,12 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
- if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc)
+ nlsize += nla_total_size(sizeof(ilwt->tunsrc));
+
nlsize += nla_total_size(sizeof(ilwt->tundst));
+ }
return nlsize;
}
@@ -451,17 +505,21 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
return (ilwt_a->freq.k != ilwt_b->freq.k ||
ilwt_a->freq.n != ilwt_b->freq.n ||
ilwt_a->mode != ilwt_b->mode ||
+ ilwt_a->has_tunsrc != ilwt_b->has_tunsrc ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
+ (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
+ ilwt_a->has_tunsrc &&
+ !ipv6_addr_equal(&ilwt_a->tunsrc, &ilwt_b->tunsrc)) ||
trace_a->namespace_id != trace_b->namespace_id);
}
static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
.build_state = ioam6_build_state,
.destroy_state = ioam6_destroy_state,
- .output = ioam6_output,
+ .output = ioam6_output,
.fill_encap = ioam6_fill_encap_info,
- .get_encap_size = ioam6_encap_nlsize,
+ .get_encap_size = ioam6_encap_nlsize,
.cmp_encap = ioam6_encap_cmp,
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3942bd2ade78..235808cfec70 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1471,7 +1471,7 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
/* TCP offload with GRE SEQ is not supported, nor can we support 2
@@ -1485,6 +1485,8 @@ static void ip6gre_tnl_init_features(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
@@ -1619,8 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
-
+ ign->fb_tunnel_dev->netns_local = true;
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 133610a49da6..70c0e16c0ae6 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,9 +111,8 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
@@ -327,9 +326,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 87dfb565a9f8..b60e13c42bca 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -53,6 +53,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -608,7 +609,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
- 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ 0, 0, 0, IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt))
goto out;
@@ -619,7 +621,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rt->rt_flags & RTCF_LOCAL) {
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
@@ -1849,7 +1852,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
@@ -2258,7 +2261,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip6n->fb_tnl_dev->netns_local = true;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index dd342e6ecf3f..2ce4ae0d8dc3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -640,7 +640,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -2431,8 +2431,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
}
static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cd342d5015c6..1e225e6489ea 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -985,7 +985,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return ip_setsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
@@ -1475,7 +1475,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return ip_getsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7ba01d8cfbae..b244dbf61d5f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -586,7 +586,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
struct ip6_sf_socklist *psl;
- int i, count, copycount;
+ unsigned int count;
+ int i, copycount;
group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
@@ -610,7 +611,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
psl = sock_dereference(pmc->sflist, sk);
count = psl ? psl->sl_count : 0;
- copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ copycount = min(count, gsf->gf_numsrc);
gsf->gf_numsrc = count;
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b8eec1b6cc2c..aba94a348673 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -200,9 +200,9 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_6CO ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
- opt->nd_opt_type == ND_OPT_PREF64 ||
- ndisc_ops_is_useropt(dev, opt->nd_opt_type);
+ opt->nd_opt_type == ND_OPT_PREF64;
}
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
@@ -1944,7 +1944,7 @@ static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
static char warncomm[TASK_COMM_LEN];
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
+ strscpy(warncomm, current->comm);
pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 131f7bb2110d..7d5602950ae7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1773,7 +1773,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index c82f3fdd4a65..492a811828a7 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -38,13 +38,13 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in6_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 219701caba1e..b4251915585f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
- if (rt_idev->dev == dev) {
+ if (rt_idev && rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
handled = true;
@@ -6193,8 +6193,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_rt_update(struct net *net, struct fib6_info *rt,
@@ -6220,8 +6219,7 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 2c83b7586422..db3c19a42e1c 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -263,10 +263,8 @@ static int rpl_input(struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
err = rpl_do_srh(skb, rlwt);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto drop;
local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
@@ -286,9 +284,13 @@ static int rpl_input(struct sk_buff *skb)
err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
- return err;
+ goto drop;
return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
}
static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 83b195f09561..39bd8951bfca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -51,6 +51,7 @@
#include <net/dsfield.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -935,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
- RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
- 0, dst, tiph->saddr, 0, 0,
+ tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
+ IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
@@ -1111,7 +1112,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph->daddr, iph->saddr,
0, 0,
IPPROTO_IPV6,
- RT_TOS(iph->tos),
+ iph->tos & INET_DSCP_MASK,
tunnel->parms.link);
if (!IS_ERR(rt)) {
@@ -1435,7 +1436,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
@@ -1855,7 +1856,7 @@ static int __net_init sit_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ sitn->fb_tunnel_dev->netns_local = true;
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 200fea92f12f..d71ab4e1efe1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1193,7 +1193,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
#endif
}
- tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
+ READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
@@ -2258,7 +2259,7 @@ static void get_timewait6_sock(struct seq_file *seq,
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- tw->tw_substate, 0, 0,
+ READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6602a2e9cdb5..52dfbb2ff1a8 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -114,7 +114,7 @@ void udp_v6_rehash(struct sock *sk)
udp_lib_rehash(sk, new_hash);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
@@ -160,7 +160,7 @@ static int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
-static struct sock *udp6_lib_lookup2(struct net *net,
+static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum,
int dif, int sdif, struct udp_hslot *hslot2,
@@ -217,7 +217,7 @@ rescore:
}
/* rcu_read_lock() must be held */
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *udptable,
@@ -300,7 +300,7 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
-struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
struct sock *sk;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 2e86f520f799..3eec23ac5ab1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -117,12 +117,12 @@ struct l2tp_net {
struct hlist_head l2tp_v3_session_htable[16];
};
-static inline u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
+static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id)
{
return ((u32)tunnel_id) << 16 | session_id;
}
-static inline unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
+static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id)
{
return ((unsigned long)sk) + session_id;
}
@@ -135,63 +135,81 @@ static bool l2tp_sk_is_v6(struct sock *sk)
}
#endif
-static inline struct l2tp_net *l2tp_pernet(const struct net *net)
+static struct l2tp_net *l2tp_pernet(const struct net *net)
{
return net_generic(net, l2tp_net_id);
}
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
{
+ struct sock *sk = tunnel->sock;
+
trace_free_tunnel(tunnel);
- sock_put(tunnel->sock);
- /* the tunnel is freed in the socket destructor */
+
+ if (sk) {
+ /* Disable udp encapsulation */
+ switch (tunnel->encap) {
+ case L2TP_ENCAPTYPE_UDP:
+ /* No longer an encapsulation socket. See net/ipv4/udp.c */
+ WRITE_ONCE(udp_sk(sk)->encap_type, 0);
+ udp_sk(sk)->encap_rcv = NULL;
+ udp_sk(sk)->encap_destroy = NULL;
+ break;
+ case L2TP_ENCAPTYPE_IP:
+ break;
+ }
+
+ tunnel->sock = NULL;
+ sock_put(sk);
+ }
+
+ kfree_rcu(tunnel, rcu);
}
static void l2tp_session_free(struct l2tp_session *session)
{
trace_free_session(session);
if (session->tunnel)
- l2tp_tunnel_dec_refcount(session->tunnel);
- kfree(session);
+ l2tp_tunnel_put(session->tunnel);
+ kfree_rcu(session, rcu);
}
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk)
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk)
{
- struct l2tp_tunnel *tunnel = sk->sk_user_data;
+ const struct net *net = sock_net(sk);
+ unsigned long tunnel_id, tmp;
+ struct l2tp_tunnel *tunnel;
+ struct l2tp_net *pn;
- if (tunnel)
- if (WARN_ON(tunnel->magic != L2TP_TUNNEL_MAGIC))
- return NULL;
+ rcu_read_lock_bh();
+ pn = l2tp_pernet(net);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel &&
+ tunnel->sock == sk &&
+ refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
- return tunnel;
+ return NULL;
}
EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel);
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_inc_refcount);
-
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel)
{
if (refcount_dec_and_test(&tunnel->ref_count))
l2tp_tunnel_free(tunnel);
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_dec_refcount);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_put);
-void l2tp_session_inc_refcount(struct l2tp_session *session)
-{
- refcount_inc(&session->ref_count);
-}
-EXPORT_SYMBOL_GPL(l2tp_session_inc_refcount);
-
-void l2tp_session_dec_refcount(struct l2tp_session *session)
+void l2tp_session_put(struct l2tp_session *session)
{
if (refcount_dec_and_test(&session->ref_count))
l2tp_session_free(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_dec_refcount);
+EXPORT_SYMBOL_GPL(l2tp_session_put);
/* Lookup a tunnel. A new reference is held on the returned tunnel. */
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
@@ -211,26 +229,27 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key)
{
struct l2tp_net *pn = l2tp_pernet(net);
- unsigned long tunnel_id, tmp;
- struct l2tp_tunnel *tunnel;
- int count = 0;
+ struct l2tp_tunnel *tunnel = NULL;
rcu_read_lock_bh();
- idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
- if (tunnel && ++count > nth &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
+again:
+ tunnel = idr_get_next_ul(&pn->l2tp_tunnel_idr, key);
+ if (tunnel) {
+ if (refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
}
+ (*key)++;
+ goto again;
}
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_get_nth);
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get_next);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id)
{
@@ -254,7 +273,15 @@ struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk,
hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
hlist, key) {
- if (session->tunnel->sock == sk &&
+ /* session->tunnel may be NULL if another thread is in
+ * l2tp_session_register and has added an item to
+ * l2tp_v3_session_htable but hasn't yet added the
+ * session to its tunnel's session_list.
+ */
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == session_id &&
+ tunnel && tunnel->sock == sk &&
refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
@@ -295,24 +322,109 @@ struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, in
}
EXPORT_SYMBOL_GPL(l2tp_session_get);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth)
+static struct l2tp_session *l2tp_v2_session_get_next(const struct net *net,
+ u16 tid,
+ unsigned long *key)
{
- struct l2tp_session *session;
- int count = 0;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ /* Start searching within the range of the tid */
+ if (*key == 0)
+ *key = l2tp_v2_session_key(tid, 0);
rcu_read_lock_bh();
- list_for_each_entry_rcu(session, &tunnel->session_list, list) {
- if (++count > nth) {
- l2tp_session_inc_refcount(session);
+again:
+ session = idr_get_next_ul(&pn->l2tp_v2_session_idr, key);
+ if (session) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ /* ignore sessions with id 0 as they are internal for pppol2tp */
+ if (session->session_id == 0) {
+ (*key)++;
+ goto again;
+ }
+
+ if (tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
rcu_read_unlock_bh();
return session;
}
+
+ (*key)++;
+ if (tunnel->tunnel_id == tid)
+ goto again;
}
rcu_read_unlock_bh();
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
+
+static struct l2tp_session *l2tp_v3_session_get_next(const struct net *net,
+ u32 tid, struct sock *sk,
+ unsigned long *key)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_session *session = NULL;
+
+ rcu_read_lock_bh();
+again:
+ session = idr_get_next_ul(&pn->l2tp_v3_session_idr, key);
+ if (session && !hash_hashed(&session->hlist)) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+
+ (*key)++;
+ goto again;
+ }
+
+ /* If we get here and session is non-NULL, the IDR entry may be one
+ * where the session_id collides with one in another tunnel. Check
+ * session_htable for a match. There can only be one session of a given
+ * ID per tunnel so we can return as soon as a match is found.
+ */
+ if (session && hash_hashed(&session->hlist)) {
+ unsigned long hkey = l2tp_v3_session_hashkey(sk, session->session_id);
+ u32 sid = session->session_id;
+
+ hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session,
+ hlist, hkey) {
+ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel);
+
+ if (session->session_id == sid &&
+ tunnel && tunnel->tunnel_id == tid &&
+ refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock_bh();
+ return session;
+ }
+ }
+
+ /* If no match found, the colliding session ID isn't in our
+ * tunnel so try the next session ID.
+ */
+ (*key)++;
+ goto again;
+ }
+
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key)
+{
+ if (pver == L2TP_HDR_VER_2)
+ return l2tp_v2_session_get_next(net, tunnel_id, key);
+ else
+ return l2tp_v3_session_get_next(net, tunnel_id, sk, key);
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get_next);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
@@ -330,7 +442,7 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
if (tunnel) {
list_for_each_entry_rcu(session, &tunnel->session_list, list) {
if (!strcmp(session->ifname, ifname)) {
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rcu_read_unlock_bh();
return session;
@@ -347,7 +459,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist,
struct l2tp_session *session)
{
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
WARN_ON_ONCE(session->coll_list);
session->coll_list = clist;
spin_lock(&clist->lock);
@@ -387,12 +499,12 @@ static int l2tp_session_collision_add(struct l2tp_net *pn,
/* If existing session isn't already in the session hlist, add it. */
if (!hash_hashed(&session2->hlist))
- hash_add(pn->l2tp_v3_session_htable, &session2->hlist,
- session2->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist,
+ session2->hlist_key);
/* Add new session to the hlist and collision list */
- hash_add(pn->l2tp_v3_session_htable, &session1->hlist,
- session1->hlist_key);
+ hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist,
+ session1->hlist_key);
refcount_inc(&clist->ref_count);
l2tp_session_coll_list_add(clist, session1);
@@ -408,7 +520,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
lockdep_assert_held(&pn->l2tp_session_idr_lock);
- hash_del(&session->hlist);
+ hash_del_rcu(&session->hlist);
if (clist) {
/* Remove session from its collision list. If there
@@ -433,7 +545,7 @@ static void l2tp_session_collision_del(struct l2tp_net *pn,
spin_unlock(&clist->lock);
if (refcount_dec_and_test(&clist->ref_count))
kfree(clist);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
@@ -442,6 +554,7 @@ int l2tp_session_register(struct l2tp_session *session,
{
struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
struct l2tp_session *other_session = NULL;
+ void *old = NULL;
u32 session_key;
int err;
@@ -481,16 +594,23 @@ int l2tp_session_register(struct l2tp_session *session,
goto out;
}
- l2tp_tunnel_inc_refcount(tunnel);
- list_add(&session->list, &tunnel->session_list);
+ refcount_inc(&tunnel->ref_count);
+ WRITE_ONCE(session->tunnel, tunnel);
+ list_add_rcu(&session->list, &tunnel->session_list);
+ /* this makes session available to lockless getters */
if (tunnel->version == L2TP_HDR_VER_3) {
if (!other_session)
- idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key);
} else {
- idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
+ old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key);
}
+ /* old should be NULL, unless something removed or modified
+ * the IDR entry after our idr_alloc_32 above (which shouldn't
+ * happen).
+ */
+ WARN_ON_ONCE(old);
out:
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
@@ -797,7 +917,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && !session->send_seq) {
trace_session_seqnum_lns_enable(session);
session->send_seq = 1;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
}
} else {
/* No sequence numbers.
@@ -818,7 +939,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
if (!session->lns_mode && session->send_seq) {
trace_session_seqnum_lns_disable(session);
session->send_seq = 0;
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version,
+ tunnel->encap);
} else if (session->send_seq) {
pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
session->name);
@@ -955,7 +1077,7 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!session || !session->recv_skb) {
if (session)
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
/* Not found? Pass to userspace to deal with */
goto pass;
@@ -969,12 +1091,12 @@ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto invalid;
}
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -1218,44 +1340,6 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
* Tinnel and session create/destroy.
*****************************************************************************/
-/* Tunnel socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void l2tp_tunnel_destruct(struct sock *sk)
-{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
-
- if (!tunnel)
- goto end;
-
- /* Disable udp encapsulation */
- switch (tunnel->encap) {
- case L2TP_ENCAPTYPE_UDP:
- /* No longer an encapsulation socket. See net/ipv4/udp.c */
- WRITE_ONCE(udp_sk(sk)->encap_type, 0);
- udp_sk(sk)->encap_rcv = NULL;
- udp_sk(sk)->encap_destroy = NULL;
- break;
- case L2TP_ENCAPTYPE_IP:
- break;
- }
-
- /* Remove hooks into tunnel socket */
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_destruct = tunnel->old_sk_destruct;
- sk->sk_user_data = NULL;
- write_unlock_bh(&sk->sk_callback_lock);
-
- /* Call the original destructor */
- if (sk->sk_destruct)
- (*sk->sk_destruct)(sk);
-
- kfree_rcu(tunnel, rcu);
-end:
- return;
-}
-
/* Remove an l2tp session from l2tp_core's lists. */
static void l2tp_session_unhash(struct l2tp_session *session)
{
@@ -1288,8 +1372,6 @@ static void l2tp_session_unhash(struct l2tp_session *session)
spin_unlock_bh(&pn->l2tp_session_idr_lock);
spin_unlock_bh(&tunnel->list_lock);
-
- synchronize_rcu();
}
}
@@ -1301,28 +1383,21 @@ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
spin_lock_bh(&tunnel->list_lock);
tunnel->acpt_newsess = false;
- for (;;) {
- session = list_first_entry_or_null(&tunnel->session_list,
- struct l2tp_session, list);
- if (!session)
- break;
- l2tp_session_inc_refcount(session);
- list_del_init(&session->list);
- spin_unlock_bh(&tunnel->list_lock);
+ list_for_each_entry(session, &tunnel->session_list, list)
l2tp_session_delete(session);
- spin_lock_bh(&tunnel->list_lock);
- l2tp_session_dec_refcount(session);
- }
spin_unlock_bh(&tunnel->list_lock);
}
/* Tunnel socket destroy hook for UDP encapsulation */
static void l2tp_udp_encap_destroy(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
@@ -1356,10 +1431,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
/* drop workqueue ref */
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
/* Create a socket for the tunnel, if one isn't set up by
@@ -1505,7 +1580,6 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
tunnel->tunnel_id = tunnel_id;
tunnel->peer_tunnel_id = peer_tunnel_id;
- tunnel->magic = L2TP_TUNNEL_MAGIC;
sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
spin_lock_init(&tunnel->list_lock);
tunnel->acpt_newsess = true;
@@ -1531,6 +1605,8 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
enum l2tp_encap_type encap)
{
+ struct l2tp_tunnel *tunnel;
+
if (!net_eq(sock_net(sk), net))
return -EINVAL;
@@ -1544,8 +1620,14 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
(encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
return -EPROTONOSUPPORT;
- if (sk->sk_user_data)
+ if (encap == L2TP_ENCAPTYPE_UDP && sk->sk_user_data)
+ return -EBUSY;
+
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
+ l2tp_tunnel_put(tunnel);
return -EBUSY;
+ }
return 0;
}
@@ -1584,12 +1666,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
goto err_inval_sock;
- rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
- .sk_user_data = tunnel,
.encap_type = UDP_ENCAP_L2TPINUDP,
.encap_rcv = l2tp_udp_encap_recv,
.encap_err_rcv = l2tp_udp_encap_err_recv,
@@ -1599,8 +1679,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
setup_udp_tunnel_sock(net, sock, &udp_cfg);
}
- tunnel->old_sk_destruct = sk->sk_destruct;
- sk->sk_destruct = &l2tp_tunnel_destruct;
sk->sk_allocation = GFP_ATOMIC;
release_sock(sk);
@@ -1639,7 +1717,7 @@ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
if (!test_and_set_bit(0, &tunnel->dead)) {
trace_delete_tunnel(tunnel);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
queue_work(l2tp_wq, &tunnel->del_work);
}
}
@@ -1647,23 +1725,37 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
void l2tp_session_delete(struct l2tp_session *session)
{
- if (test_and_set_bit(0, &session->dead))
- return;
+ if (!test_and_set_bit(0, &session->dead)) {
+ trace_delete_session(session);
+ refcount_inc(&session->ref_count);
+ queue_work(l2tp_wq, &session->del_work);
+ }
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+/* Workqueue session deletion function */
+static void l2tp_session_del_work(struct work_struct *work)
+{
+ struct l2tp_session *session = container_of(work, struct l2tp_session,
+ del_work);
- trace_delete_session(session);
l2tp_session_unhash(session);
l2tp_session_queue_purge(session);
if (session->session_close)
(*session->session_close)(session);
- l2tp_session_dec_refcount(session);
+ /* drop initial ref */
+ l2tp_session_put(session);
+
+ /* drop workqueue ref */
+ l2tp_session_put(session);
}
-EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_type parameters are set.
*/
-void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1672,7 +1764,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
} else {
session->hdr_len = 4 + session->cookie_len;
session->hdr_len += l2tp_get_l2specific_len(session);
- if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+ if (encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
}
@@ -1686,7 +1778,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL);
if (session) {
session->magic = L2TP_SESSION_MAGIC;
- session->tunnel = tunnel;
session->session_id = session_id;
session->peer_session_id = peer_session_id;
@@ -1710,6 +1801,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
INIT_HLIST_NODE(&session->hlist);
INIT_LIST_HEAD(&session->clist);
INIT_LIST_HEAD(&session->list);
+ INIT_WORK(&session->del_work, l2tp_session_del_work);
if (cfg) {
session->pwtype = cfg->pw_type;
@@ -1724,7 +1816,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
}
- l2tp_session_set_header_len(session, tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
refcount_set(&session->ref_count, 1);
@@ -1753,7 +1845,7 @@ static __net_init int l2tp_init_net(struct net *net)
return 0;
}
-static __net_exit void l2tp_exit_net(struct net *net)
+static __net_exit void l2tp_pre_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
@@ -1766,18 +1858,34 @@ static __net_exit void l2tp_exit_net(struct net *net)
}
rcu_read_unlock_bh();
- if (l2tp_wq)
- flush_workqueue(l2tp_wq);
- rcu_barrier();
+ if (l2tp_wq) {
+ /* Run all TUNNEL_DELETE work items just queued. */
+ __flush_workqueue(l2tp_wq);
+
+ /* Each TUNNEL_DELETE work item will queue a SESSION_DELETE
+ * work item for each session in the tunnel. Flush the
+ * workqueue again to process these.
+ */
+ __flush_workqueue(l2tp_wq);
+ }
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v2_session_idr));
idr_destroy(&pn->l2tp_v2_session_idr);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v3_session_idr));
idr_destroy(&pn->l2tp_v3_session_idr);
+ WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_tunnel_idr));
idr_destroy(&pn->l2tp_tunnel_idr);
}
static struct pernet_operations l2tp_net_ops = {
.init = l2tp_init_net,
.exit = l2tp_exit_net,
+ .pre_exit = l2tp_pre_exit_net,
.id = &l2tp_net_id,
.size = sizeof(struct l2tp_net),
};
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 8ac81bc1bc6f..ffd8ced3a51f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -16,7 +16,6 @@
#endif
/* Random numbers used for internal consistency checks of tunnel and session structures */
-#define L2TP_TUNNEL_MAGIC 0x42114DDA
#define L2TP_SESSION_MAGIC 0x0C04EB7D
struct sk_buff;
@@ -67,6 +66,7 @@ struct l2tp_session_coll_list {
struct l2tp_session {
int magic; /* should be L2TP_SESSION_MAGIC */
long dead;
+ struct rcu_head rcu;
struct l2tp_tunnel *tunnel; /* back pointer to tunnel context */
u32 session_id;
@@ -103,6 +103,7 @@ struct l2tp_session {
int reorder_skip; /* set if skip to next nr */
enum l2tp_pwtype pwtype;
struct l2tp_stats stats;
+ struct work_struct del_work;
/* Session receive handler for data packets.
* Each pseudowire implementation should implement this callback in order to
@@ -155,8 +156,6 @@ struct l2tp_tunnel_cfg {
*/
#define L2TP_TUNNEL_NAME_MAX 20
struct l2tp_tunnel {
- int magic; /* Should be L2TP_TUNNEL_MAGIC */
-
unsigned long dead;
struct rcu_head rcu;
@@ -176,7 +175,6 @@ struct l2tp_tunnel {
struct net *l2tp_net; /* the net we belong to */
refcount_t ref_count;
- void (*old_sk_destruct)(struct sock *sk);
struct sock *sock; /* parent socket */
int fd; /* parent fd, if tunnel socket was created
* by userspace
@@ -211,23 +209,22 @@ static inline void *l2tp_session_priv(struct l2tp_session *session)
}
/* Tunnel and session refcounts */
-void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel);
-void l2tp_session_inc_refcount(struct l2tp_session *session);
-void l2tp_session_dec_refcount(struct l2tp_session *session);
+void l2tp_tunnel_put(struct l2tp_tunnel *tunnel);
+void l2tp_session_put(struct l2tp_session *session);
/* Tunnel and session lookup.
* These functions take a reference on the instances they return, so
* the caller must ensure that the reference is dropped appropriately.
*/
struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
-struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth);
+struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key);
struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id);
struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id);
struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver,
u32 tunnel_id, u32 session_id);
-struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver,
+ u32 tunnel_id, unsigned long *key);
struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
const char *ifname);
@@ -260,7 +257,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
/* Transmit path helpers for sending packets over the tunnel socket. */
-void l2tp_session_set_header_len(struct l2tp_session *session, int version);
+void l2tp_session_set_header_len(struct l2tp_session *session, int version,
+ enum l2tp_encap_type encap);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb);
/* Pseudowire management.
@@ -273,10 +271,7 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* IOCTL helper for IP encap modules. */
int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
-/* Extract the tunnel structure from a socket's sk_user_data pointer,
- * validating the tunnel magic feather.
- */
-struct l2tp_tunnel *l2tp_sk_to_tunnel(struct sock *sk);
+struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk);
static inline int l2tp_get_l2specific_len(struct l2tp_session *session)
{
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 8755ae521154..2d0c8275a3a8 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -34,8 +34,8 @@ static struct dentry *rootdir;
struct l2tp_dfs_seq_data {
struct net *net;
netns_tracker ns_tracker;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -44,23 +44,25 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
- pd->tunnel = l2tp_tunnel_get_nth(pd->net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(pd->net, &pd->tkey);
+ pd->tkey++;
}
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(pd->net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
l2tp_dfs_next_tunnel(pd);
}
}
@@ -109,11 +111,11 @@ static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
* or l2tp_dfs_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8ba00ad433c2..d692b902e120 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -72,31 +72,19 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev
unsigned int len = skb->len;
int ret = l2tp_xmit_skb(session, skb);
- if (likely(ret == NET_XMIT_SUCCESS)) {
- DEV_STATS_ADD(dev, tx_bytes, len);
- DEV_STATS_INC(dev, tx_packets);
- } else {
+ if (likely(ret == NET_XMIT_SUCCESS))
+ dev_sw_netstats_tx_add(dev, 1, len);
+ else
DEV_STATS_INC(dev, tx_dropped);
- }
- return NETDEV_TX_OK;
-}
-static void l2tp_eth_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *stats)
-{
- stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes);
- stats->tx_packets = DEV_STATS_READ(dev, tx_packets);
- stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped);
- stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes);
- stats->rx_packets = DEV_STATS_READ(dev, rx_packets);
- stats->rx_errors = DEV_STATS_READ(dev, rx_errors);
+ return NETDEV_TX_OK;
}
static const struct net_device_ops l2tp_eth_netdev_ops = {
.ndo_init = l2tp_eth_dev_init,
.ndo_uninit = l2tp_eth_dev_uninit,
.ndo_start_xmit = l2tp_eth_dev_xmit,
- .ndo_get_stats64 = l2tp_eth_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_set_mac_address = eth_mac_addr,
};
@@ -109,9 +97,10 @@ static void l2tp_eth_dev_setup(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &l2tpeth_type);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->netdev_ops = &l2tp_eth_netdev_ops;
dev->needs_free_netdev = true;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
}
static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
@@ -138,12 +127,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
if (!dev)
goto error_rcu;
- if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
- DEV_STATS_INC(dev, rx_packets);
- DEV_STATS_ADD(dev, rx_bytes, data_len);
- } else {
+ if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS)
+ dev_sw_netstats_rx_add(dev, data_len);
+ else
DEV_STATS_INC(dev, rx_errors);
- }
+
rcu_read_unlock();
return;
@@ -283,7 +271,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
spriv = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
rtnl_lock();
@@ -301,7 +289,7 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
if (rc < 0) {
rtnl_unlock();
l2tp_session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
return rc;
@@ -312,17 +300,17 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel,
rtnl_unlock();
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
__module_get(THIS_MODULE);
return 0;
err_sess_dev:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
free_netdev(dev);
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return rc;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index e48aa177d74c..4bc24fddfd52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -22,9 +22,19 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip_net_id;
+struct l2tp_ip_net {
+ rwlock_t l2tp_ip_lock;
+ struct hlist_head l2tp_ip_table;
+ struct hlist_head l2tp_ip_bind_table;
+};
+
struct l2tp_ip_sock {
/* inet_sock has to be the first member of l2tp_ip_sock */
struct inet_sock inet;
@@ -33,21 +43,23 @@ struct l2tp_ip_sock {
u32 peer_conn_id;
};
-static DEFINE_RWLOCK(l2tp_ip_lock);
-static struct hlist_head l2tp_ip_table;
-static struct hlist_head l2tp_ip_bind_table;
-
-static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+static struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
{
return (struct l2tp_ip_sock *)sk;
}
+static struct l2tp_ip_net *l2tp_ip_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip_net_id);
+}
+
static struct sock *__l2tp_ip_bind_lookup(const struct net *net, __be32 laddr,
__be32 raddr, int dif, u32 tunnel_id)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip_bind_table) {
const struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
int bound_dev_if;
@@ -113,6 +125,7 @@ found:
static int l2tp_ip_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -121,6 +134,8 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct iphdr *iph;
+ pn = l2tp_ip_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -152,7 +167,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -167,15 +182,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = (struct iphdr *)skb_network_header(skb);
- read_lock_bh(&l2tp_ip_lock);
+ read_lock_bh(&pn->l2tp_ip_lock);
sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb),
tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip_lock);
+ read_unlock_bh(&pn->l2tp_ip_lock);
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -185,7 +200,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -198,21 +213,25 @@ discard:
static int l2tp_ip_hash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip_lock);
- sk_add_node(sk, &l2tp_ip_table);
- write_unlock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
+ sk_add_node(sk, &pn->l2tp_ip_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
return 0;
}
static void l2tp_ip_unhash(struct sock *sk)
{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
}
static int l2tp_ip_open(struct sock *sk)
@@ -226,23 +245,26 @@ static int l2tp_ip_open(struct sock *sk)
static void l2tp_ip_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip_lock);
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
sk_common_release(sk);
}
static void l2tp_ip_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
- struct sk_buff *skb;
+ struct l2tp_tunnel *tunnel;
- while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
- kfree_skb(skb);
+ __skb_queue_purge(&sk->sk_write_queue);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -250,6 +272,7 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip_net *pn;
int ret;
int chk_addr_ret;
@@ -280,10 +303,11 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->inet_saddr = 0; /* Use device */
- write_lock_bh(&l2tp_ip_lock);
+ pn = l2tp_ip_pernet(net);
+ write_lock_bh(&pn->l2tp_ip_lock);
if (__l2tp_ip_bind_lookup(net, addr->l2tp_addr.s_addr, 0,
sk->sk_bound_dev_if, addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = -EADDRINUSE;
goto out;
}
@@ -291,9 +315,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip_lock);
+ write_unlock_bh(&pn->l2tp_ip_lock);
ret = 0;
sock_reset_flag(sk, SOCK_ZAPPED);
@@ -307,6 +331,7 @@ out:
static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(sock_net(sk));
int rc;
if (addr_len < sizeof(*lsa))
@@ -329,10 +354,10 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip_lock);
+ write_lock_bh(&pn->l2tp_ip_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip_bind_table);
- write_unlock_bh(&l2tp_ip_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip_bind_table);
+ write_unlock_bh(&pn->l2tp_ip_lock);
out_sk:
release_sock(sk);
@@ -637,25 +662,58 @@ static struct net_protocol l2tp_ip_protocol __read_mostly = {
.handler = l2tp_ip_recv,
};
+static __net_init int l2tp_ip_init_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = net_generic(net, l2tp_ip_net_id);
+
+ rwlock_init(&pn->l2tp_ip_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip_exit_net(struct net *net)
+{
+ struct l2tp_ip_net *pn = l2tp_ip_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip_lock);
+}
+
+static struct pernet_operations l2tp_ip_net_ops = {
+ .init = l2tp_ip_init_net,
+ .exit = l2tp_ip_exit_net,
+ .id = &l2tp_ip_net_id,
+ .size = sizeof(struct l2tp_ip_net),
+};
+
static int __init l2tp_ip_init(void)
{
int err;
pr_info("L2TP IP encapsulation support (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet_register_protosw(&l2tp_ip_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip_net_ops);
out:
return err;
}
@@ -665,6 +723,7 @@ static void __exit l2tp_ip_exit(void)
inet_unregister_protosw(&l2tp_ip_protosw);
inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip_prot);
+ unregister_pernet_device(&l2tp_ip_net_ops);
}
module_init(l2tp_ip_init);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d217ff1f229e..f4c1da070826 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -22,6 +22,8 @@
#include <net/tcp_states.h>
#include <net/protocol.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/transp_v6.h>
#include <net/addrconf.h>
@@ -29,6 +31,14 @@
#include "l2tp_core.h"
+/* per-net private data for this module */
+static unsigned int l2tp_ip6_net_id;
+struct l2tp_ip6_net {
+ rwlock_t l2tp_ip6_lock;
+ struct hlist_head l2tp_ip6_table;
+ struct hlist_head l2tp_ip6_bind_table;
+};
+
struct l2tp_ip6_sock {
/* inet_sock has to be the first member of l2tp_ip6_sock */
struct inet_sock inet;
@@ -39,23 +49,25 @@ struct l2tp_ip6_sock {
struct ipv6_pinfo inet6;
};
-static DEFINE_RWLOCK(l2tp_ip6_lock);
-static struct hlist_head l2tp_ip6_table;
-static struct hlist_head l2tp_ip6_bind_table;
-
-static inline struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
+static struct l2tp_ip6_sock *l2tp_ip6_sk(const struct sock *sk)
{
return (struct l2tp_ip6_sock *)sk;
}
+static struct l2tp_ip6_net *l2tp_ip6_pernet(const struct net *net)
+{
+ return net_generic(net, l2tp_ip6_net_id);
+}
+
static struct sock *__l2tp_ip6_bind_lookup(const struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *raddr,
int dif, u32 tunnel_id)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
struct sock *sk;
- sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
+ sk_for_each_bound(sk, &pn->l2tp_ip6_bind_table) {
const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
const struct in6_addr *sk_raddr = &sk->sk_v6_daddr;
const struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
@@ -123,6 +135,7 @@ found:
static int l2tp_ip6_recv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
+ struct l2tp_ip6_net *pn;
struct sock *sk;
u32 session_id;
u32 tunnel_id;
@@ -131,6 +144,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
struct ipv6hdr *iph;
+ pn = l2tp_ip6_pernet(net);
+
if (!pskb_may_pull(skb, 4))
goto discard;
@@ -162,7 +177,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
goto discard_sess;
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
@@ -177,15 +192,15 @@ pass_up:
tunnel_id = ntohl(*(__be32 *)&skb->data[4]);
iph = ipv6_hdr(skb);
- read_lock_bh(&l2tp_ip6_lock);
+ read_lock_bh(&pn->l2tp_ip6_lock);
sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr,
inet6_iif(skb), tunnel_id);
if (!sk) {
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
goto discard;
}
sock_hold(sk);
- read_unlock_bh(&l2tp_ip6_lock);
+ read_unlock_bh(&pn->l2tp_ip6_lock);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_put;
@@ -195,7 +210,7 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
discard_sess:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
goto discard;
discard_put:
@@ -208,21 +223,25 @@ discard:
static int l2tp_ip6_hash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk)) {
- write_lock_bh(&l2tp_ip6_lock);
- sk_add_node(sk, &l2tp_ip6_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ sk_add_node(sk, &pn->l2tp_ip6_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
return 0;
}
static void l2tp_ip6_unhash(struct sock *sk)
{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
if (sk_unhashed(sk))
return;
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
}
static int l2tp_ip6_open(struct sock *sk)
@@ -236,24 +255,29 @@ static int l2tp_ip6_open(struct sock *sk)
static void l2tp_ip6_close(struct sock *sk, long timeout)
{
- write_lock_bh(&l2tp_ip6_lock);
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(sock_net(sk));
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sk_common_release(sk);
}
static void l2tp_ip6_destroy_sock(struct sock *sk)
{
- struct l2tp_tunnel *tunnel = l2tp_sk_to_tunnel(sk);
+ struct l2tp_tunnel *tunnel;
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
- if (tunnel)
+ tunnel = l2tp_sk_to_tunnel(sk);
+ if (tunnel) {
l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_put(tunnel);
+ }
}
static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -262,11 +286,14 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
struct ipv6_pinfo *np = inet6_sk(sk);
struct sockaddr_l2tpip6 *addr = (struct sockaddr_l2tpip6 *)uaddr;
struct net *net = sock_net(sk);
+ struct l2tp_ip6_net *pn;
__be32 v4addr = 0;
int bound_dev_if;
int addr_type;
int err;
+ pn = l2tp_ip6_pernet(net);
+
if (addr->l2tp_family != AF_INET6)
return -EINVAL;
if (addr_len < sizeof(*addr))
@@ -324,10 +351,10 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
}
rcu_read_unlock();
- write_lock_bh(&l2tp_ip6_lock);
+ write_lock_bh(&pn->l2tp_ip6_lock);
if (__l2tp_ip6_bind_lookup(net, &addr->l2tp_addr, NULL, bound_dev_if,
addr->l2tp_conn_id)) {
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
err = -EADDRINUSE;
goto out_unlock;
}
@@ -340,9 +367,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
l2tp_ip6_sk(sk)->conn_id = addr->l2tp_conn_id;
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
sk_del_node_init(sk);
- write_unlock_bh(&l2tp_ip6_lock);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
sock_reset_flag(sk, SOCK_ZAPPED);
release_sock(sk);
@@ -364,6 +391,7 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
struct in6_addr *daddr;
int addr_type;
int rc;
+ struct l2tp_ip6_net *pn;
if (addr_len < sizeof(*lsa))
return -EINVAL;
@@ -395,10 +423,11 @@ static int l2tp_ip6_connect(struct sock *sk, struct sockaddr *uaddr,
l2tp_ip6_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
- write_lock_bh(&l2tp_ip6_lock);
+ pn = l2tp_ip6_pernet(sock_net(sk));
+ write_lock_bh(&pn->l2tp_ip6_lock);
hlist_del_init(&sk->sk_bind_node);
- sk_add_bind_node(sk, &l2tp_ip6_bind_table);
- write_unlock_bh(&l2tp_ip6_lock);
+ sk_add_bind_node(sk, &pn->l2tp_ip6_bind_table);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
out_sk:
release_sock(sk);
@@ -765,25 +794,58 @@ static struct inet6_protocol l2tp_ip6_protocol __read_mostly = {
.handler = l2tp_ip6_recv,
};
+static __net_init int l2tp_ip6_init_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = net_generic(net, l2tp_ip6_net_id);
+
+ rwlock_init(&pn->l2tp_ip6_lock);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_table);
+ INIT_HLIST_HEAD(&pn->l2tp_ip6_bind_table);
+ return 0;
+}
+
+static __net_exit void l2tp_ip6_exit_net(struct net *net)
+{
+ struct l2tp_ip6_net *pn = l2tp_ip6_pernet(net);
+
+ write_lock_bh(&pn->l2tp_ip6_lock);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_table) != 0);
+ WARN_ON_ONCE(hlist_count_nodes(&pn->l2tp_ip6_bind_table) != 0);
+ write_unlock_bh(&pn->l2tp_ip6_lock);
+}
+
+static struct pernet_operations l2tp_ip6_net_ops = {
+ .init = l2tp_ip6_init_net,
+ .exit = l2tp_ip6_exit_net,
+ .id = &l2tp_ip6_net_id,
+ .size = sizeof(struct l2tp_ip6_net),
+};
+
static int __init l2tp_ip6_init(void)
{
int err;
pr_info("L2TP IP encapsulation support for IPv6 (L2TPv3)\n");
+ err = register_pernet_device(&l2tp_ip6_net_ops);
+ if (err)
+ goto out;
+
err = proto_register(&l2tp_ip6_prot, 1);
if (err != 0)
- goto out;
+ goto out1;
err = inet6_add_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
if (err)
- goto out1;
+ goto out2;
inet6_register_protosw(&l2tp_ip6_protosw);
return 0;
-out1:
+out2:
proto_unregister(&l2tp_ip6_prot);
+out1:
+ unregister_pernet_device(&l2tp_ip6_net_ops);
out:
return err;
}
@@ -793,6 +855,7 @@ static void __exit l2tp_ip6_exit(void)
inet6_unregister_protosw(&l2tp_ip6_protosw);
inet6_del_protocol(&l2tp_ip6_protocol, IPPROTO_L2TP);
proto_unregister(&l2tp_ip6_prot);
+ unregister_pernet_device(&l2tp_ip6_net_ops);
}
module_init(l2tp_ip6_init);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d105030520f9..284f1dec1b56 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -63,7 +63,7 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info)
if (tunnel) {
session = l2tp_session_get(net, tunnel->sock, tunnel->version,
tunnel_id, session_id);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
}
}
@@ -242,7 +242,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
if (ret < 0)
goto out;
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
ret = l2tp_tunnel_register(tunnel, net, &cfg);
if (ret < 0) {
kfree(tunnel);
@@ -250,7 +250,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
}
ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
L2TP_CMD_TUNNEL_CREATE);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -280,7 +280,7 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
l2tp_tunnel_delete(tunnel);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -308,7 +308,7 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
@@ -479,42 +479,48 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
goto err_nlmsg_tunnel;
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
err_nlmsg_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
err_nlmsg:
nlmsg_free(msg);
err:
return ret;
}
+struct l2tp_nl_cb_data {
+ unsigned long tkey;
+ unsigned long skey;
+};
+
static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
- int ti = cb->args[0];
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
+ unsigned long key = cbd->tkey;
struct l2tp_tunnel *tunnel;
struct net *net = sock_net(skb->sk);
for (;;) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &key);
if (!tunnel)
goto out;
if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
tunnel, L2TP_CMD_TUNNEL_GET) < 0) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
goto out;
}
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
- ti++;
+ key++;
}
out:
- cb->args[0] = ti;
+ cbd->tkey = key;
return skb->len;
}
@@ -641,12 +647,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
}
}
out_tunnel:
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
out:
return ret;
}
@@ -671,7 +677,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
l2tp_nl_cmd_ops[pw_type]->session_delete(session);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -692,8 +698,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
if (info->attrs[L2TP_ATTR_SEND_SEQ]) {
+ struct l2tp_tunnel *tunnel = session->tunnel;
+
session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, tunnel->version, tunnel->encap);
}
if (info->attrs[L2TP_ATTR_LNS_MODE])
@@ -705,7 +713,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
out:
return ret;
@@ -816,57 +824,59 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return ret;
err_ref_msg:
nlmsg_free(msg);
err_ref:
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
err:
return ret;
}
static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct l2tp_nl_cb_data *cbd = (void *)&cb->ctx[0];
struct net *net = sock_net(skb->sk);
struct l2tp_session *session;
struct l2tp_tunnel *tunnel = NULL;
- int ti = cb->args[0];
- int si = cb->args[1];
+ unsigned long tkey = cbd->tkey;
+ unsigned long skey = cbd->skey;
for (;;) {
if (!tunnel) {
- tunnel = l2tp_tunnel_get_nth(net, ti);
+ tunnel = l2tp_tunnel_get_next(net, &tkey);
if (!tunnel)
goto out;
}
- session = l2tp_session_get_nth(tunnel, si);
+ session = l2tp_session_get_next(net, tunnel->sock, tunnel->version,
+ tunnel->tunnel_id, &skey);
if (!session) {
- ti++;
- l2tp_tunnel_dec_refcount(tunnel);
+ tkey++;
+ l2tp_tunnel_put(tunnel);
tunnel = NULL;
- si = 0;
+ skey = 0;
continue;
}
if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
session, L2TP_CMD_SESSION_GET) < 0) {
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
break;
}
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
- si++;
+ skey++;
}
out:
- cb->args[0] = ti;
- cb->args[1] = si;
+ cbd->tkey = tkey;
+ cbd->skey = skey;
return skb->len;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3596290047b2..53baf2dd5d5d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -119,7 +119,6 @@ struct pppol2tp_session {
struct mutex sk_lock; /* Protects .sk */
struct sock __rcu *sk; /* Pointer to the session PPPoX socket */
struct sock *__sk; /* Copy of .sk, for cleanup */
- struct rcu_head rcu; /* For asynchronous release */
};
static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
@@ -150,27 +149,23 @@ static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session)
/* Helpers to obtain tunnel/session contexts from sockets.
*/
-static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+static struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
{
struct l2tp_session *session;
if (!sk)
return NULL;
- sock_hold(sk);
- session = (struct l2tp_session *)(sk->sk_user_data);
- if (!session) {
- sock_put(sk);
- goto out;
- }
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC)) {
- session = NULL;
- sock_put(sk);
- goto out;
+ rcu_read_lock();
+ session = rcu_dereference_sk_user_data(sk);
+ if (session && refcount_inc_not_zero(&session->ref_count)) {
+ rcu_read_unlock();
+ WARN_ON_ONCE(session->magic != L2TP_SESSION_MAGIC);
+ return session;
}
+ rcu_read_unlock();
-out:
- return session;
+ return NULL;
}
/*****************************************************************************
@@ -318,12 +313,12 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return total_len;
error_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
error:
return error;
}
@@ -377,12 +372,12 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
l2tp_xmit_skb(session, skb);
local_bh_enable();
- sock_put(sk);
+ l2tp_session_put(session);
return 1;
abort_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
abort:
/* Free the original skb */
kfree_skb(skb);
@@ -393,29 +388,32 @@ abort:
* Session (and tunnel control) socket create/destroy.
*****************************************************************************/
-static void pppol2tp_put_sk(struct rcu_head *head)
-{
- struct pppol2tp_session *ps;
-
- ps = container_of(head, typeof(*ps), rcu);
- sock_put(ps->__sk);
-}
-
/* Really kill the session socket. (Called from sock_put() if
* refcnt == 0.)
*/
static void pppol2tp_session_destruct(struct sock *sk)
{
- struct l2tp_session *session = sk->sk_user_data;
-
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_write_queue);
+}
- if (session) {
- sk->sk_user_data = NULL;
- if (WARN_ON(session->magic != L2TP_SESSION_MAGIC))
- return;
- l2tp_session_dec_refcount(session);
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+ struct pppol2tp_session *ps;
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ if (ps->__sk) {
+ /* detach socket */
+ rcu_assign_sk_user_data(ps->__sk, NULL);
+ sock_put(ps->__sk);
+
+ /* drop ref taken when we referenced socket via sk_user_data */
+ l2tp_session_put(session);
}
}
@@ -444,30 +442,13 @@ static int pppol2tp_release(struct socket *sock)
session = pppol2tp_sock_to_session(sk);
if (session) {
- struct pppol2tp_session *ps;
-
l2tp_session_delete(session);
-
- ps = l2tp_session_priv(session);
- mutex_lock(&ps->sk_lock);
- ps->__sk = rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock));
- RCU_INIT_POINTER(ps->sk, NULL);
- mutex_unlock(&ps->sk_lock);
- call_rcu(&ps->rcu, pppol2tp_put_sk);
-
- /* Rely on the sock_put() call at the end of the function for
- * dropping the reference held by pppol2tp_sock_to_session().
- * The last reference will be dropped by pppol2tp_put_sk().
- */
+ /* drop ref taken by pppol2tp_sock_to_session */
+ l2tp_session_put(session);
}
release_sock(sk);
- /* This will delete the session context via
- * pppol2tp_session_destruct() if the socket's refcnt drops to
- * zero.
- */
sock_put(sk);
return 0;
@@ -506,6 +487,7 @@ static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
goto out;
sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sock->state = SS_UNCONNECTED;
sock->ops = &pppol2tp_ops;
@@ -542,6 +524,7 @@ static void pppol2tp_session_init(struct l2tp_session *session)
struct pppol2tp_session *ps;
session->recv_skb = pppol2tp_recv;
+ session->session_close = pppol2tp_session_close;
if (IS_ENABLED(CONFIG_L2TP_DEBUGFS))
session->show = pppol2tp_show;
@@ -685,7 +668,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
if (error < 0)
return ERR_PTR(error);
- l2tp_tunnel_inc_refcount(tunnel);
+ refcount_inc(&tunnel->ref_count);
error = l2tp_tunnel_register(tunnel, net, &tcfg);
if (error < 0) {
kfree(tunnel);
@@ -701,7 +684,7 @@ static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
/* Error if socket is not prepped */
if (!tunnel->sock) {
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_put(tunnel);
return ERR_PTR(-ENOENT);
}
}
@@ -787,18 +770,20 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
goto end;
}
+ drop_refcnt = true;
+
pppol2tp_session_init(session);
ps = l2tp_session_priv(session);
- l2tp_session_inc_refcount(session);
+ refcount_inc(&session->ref_count);
mutex_lock(&ps->sk_lock);
error = l2tp_session_register(session, tunnel);
if (error < 0) {
mutex_unlock(&ps->sk_lock);
- kfree(session);
+ l2tp_session_put(session);
goto end;
}
- drop_refcnt = true;
+
new_session = true;
}
@@ -830,12 +815,13 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
out_no_ppp:
/* This is how we get the session context from the socket. */
- sk->sk_user_data = session;
+ sock_hold(sk);
+ rcu_assign_sk_user_data(sk, session);
rcu_assign_pointer(ps->sk, sk);
mutex_unlock(&ps->sk_lock);
/* Keep the reference we've grabbed on the session: sk doesn't expect
- * the session to disappear. pppol2tp_session_destruct() is responsible
+ * the session to disappear. pppol2tp_session_close() is responsible
* for dropping it.
*/
drop_refcnt = false;
@@ -850,8 +836,8 @@ end:
l2tp_tunnel_delete(tunnel);
}
if (drop_refcnt)
- l2tp_session_dec_refcount(session);
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_session_put(session);
+ l2tp_tunnel_put(tunnel);
release_sock(sk);
return error;
@@ -891,7 +877,7 @@ static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel,
return 0;
err_sess:
- kfree(session);
+ l2tp_session_put(session);
err:
return error;
}
@@ -1002,7 +988,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
error = len;
- sock_put(sk);
+ l2tp_session_put(session);
end:
return error;
}
@@ -1052,12 +1038,12 @@ static int pppol2tp_tunnel_copy_stats(struct pppol2tp_ioc_stats *stats,
return -EBADR;
if (session->pwtype != L2TP_PWTYPE_PPP) {
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return -EBADR;
}
pppol2tp_copy_stats(stats, &session->stats);
- l2tp_session_dec_refcount(session);
+ l2tp_session_put(session);
return 0;
}
@@ -1205,7 +1191,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
- l2tp_session_set_header_len(session, session->tunnel->version);
+ l2tp_session_set_header_len(session, session->tunnel->version,
+ session->tunnel->encap);
break;
case PPPOL2TP_SO_LNSMODE:
@@ -1274,7 +1261,7 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
err = pppol2tp_session_setsockopt(sk, session, optname, val);
}
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1395,7 +1382,7 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
err = 0;
end_put_sess:
- sock_put(sk);
+ l2tp_session_put(session);
end:
return err;
}
@@ -1406,14 +1393,12 @@ end:
* L2TPv2, we dump only L2TPv2 tunnels and sessions here.
*****************************************************************************/
-static unsigned int pppol2tp_net_id;
-
#ifdef CONFIG_PROC_FS
struct pppol2tp_seq_data {
struct seq_net_private p;
- int tunnel_idx; /* current tunnel */
- int session_idx; /* index of session within current tunnel */
+ unsigned long tkey; /* lookup key of current tunnel */
+ unsigned long skey; /* lookup key of current session */
struct l2tp_tunnel *tunnel;
struct l2tp_session *session; /* NULL means get next tunnel */
};
@@ -1422,17 +1407,17 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->tunnel)
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
for (;;) {
- pd->tunnel = l2tp_tunnel_get_nth(net, pd->tunnel_idx);
- pd->tunnel_idx++;
+ pd->tunnel = l2tp_tunnel_get_next(net, &pd->tkey);
+ pd->tkey++;
/* Only accept L2TPv2 tunnels */
if (!pd->tunnel || pd->tunnel->version == 2)
return;
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
}
}
@@ -1440,13 +1425,15 @@ static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
/* Drop reference taken during previous invocation */
if (pd->session)
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
- pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx);
- pd->session_idx++;
+ pd->session = l2tp_session_get_next(net, pd->tunnel->sock,
+ pd->tunnel->version,
+ pd->tunnel->tunnel_id, &pd->skey);
+ pd->skey++;
if (!pd->session) {
- pd->session_idx = 0;
+ pd->skey = 0;
pppol2tp_next_tunnel(net, pd);
}
}
@@ -1498,11 +1485,11 @@ static void pppol2tp_seq_stop(struct seq_file *p, void *v)
* or pppol2tp_next_tunnel().
*/
if (pd->session) {
- l2tp_session_dec_refcount(pd->session);
+ l2tp_session_put(pd->session);
pd->session = NULL;
}
if (pd->tunnel) {
- l2tp_tunnel_dec_refcount(pd->tunnel);
+ l2tp_tunnel_put(pd->tunnel);
pd->tunnel = NULL;
}
}
@@ -1513,7 +1500,7 @@ static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
seq_printf(m, "\nTUNNEL '%s', %c %d\n",
tunnel->name,
- (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+ tunnel->sock ? 'Y' : 'N',
refcount_read(&tunnel->ref_count) - 1);
seq_printf(m, " %08x %ld/%ld/%ld %ld/%ld/%ld\n",
0,
@@ -1641,7 +1628,6 @@ static __net_exit void pppol2tp_exit_net(struct net *net)
static struct pernet_operations pppol2tp_net_ops = {
.init = pppol2tp_init_net,
.exit = pppol2tp_exit_net,
- .id = &pppol2tp_net_id,
};
/*****************************************************************************
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 9bffac7a4974..fe7eab4b681b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -207,20 +207,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
mgmt->u.action.category = WLAN_CATEGORY_BACK;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 677bbbac9f16..1c18b862ef8c 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -74,20 +74,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index fdf8b658fede..c61df637232a 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -55,10 +55,21 @@
#define HE_DURATION_S(shift, streams, gi, bps) \
(HE_DURATION(streams, gi, bps) >> shift)
+/* gi in HE/EHT is identical. It matches enum nl80211_eht_gi as well */
+#define EHT_GI_08 HE_GI_08
+#define EHT_GI_16 HE_GI_16
+#define EHT_GI_32 HE_GI_32
+
+#define EHT_DURATION(streams, gi, bps) \
+ HE_DURATION(streams, gi, bps)
+#define EHT_DURATION_S(shift, streams, gi, bps) \
+ HE_DURATION_S(shift, streams, gi, bps)
+
#define BW_20 0
#define BW_40 1
#define BW_80 2
#define BW_160 3
+#define BW_320 4
/*
* Define group sort order: HT40 -> SGI -> #streams
@@ -68,17 +79,26 @@
#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
#define IEEE80211_HE_MAX_STREAMS 8
+#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
+
+#define IEEE80211_EHT_MAX_STREAMS 8
+#define IEEE80211_EHT_STREAM_GROUPS 15 /* BW(=5) * GI(=3) */
#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_HT_STREAM_GROUPS)
#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
IEEE80211_VHT_STREAM_GROUPS)
+#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
+ IEEE80211_HE_STREAM_GROUPS)
+#define IEEE80211_EHT_GROUPS_NB (IEEE80211_EHT_MAX_STREAMS * \
+ IEEE80211_EHT_STREAM_GROUPS)
#define IEEE80211_HT_GROUP_0 0
#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
#define IEEE80211_HE_GROUP_0 (IEEE80211_VHT_GROUP_0 + IEEE80211_VHT_GROUPS_NB)
+#define IEEE80211_EHT_GROUP_0 (IEEE80211_HE_GROUP_0 + IEEE80211_HE_GROUPS_NB)
-#define MCS_GROUP_RATES 12
+#define MCS_GROUP_RATES 14
#define HT_GROUP_IDX(_streams, _sgi, _ht40) \
IEEE80211_HT_GROUP_0 + \
@@ -203,6 +223,69 @@
#define HE_GROUP(_streams, _gi, _bw) \
__HE_GROUP(_streams, _gi, _bw, \
HE_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_BW2VBPS(_bw, r5, r4, r3, r2, r1) \
+ ((_bw) == BW_320 ? r5 : BW2VBPS(_bw, r4, r3, r2, r1))
+
+#define EHT_GROUP_IDX(_streams, _gi, _bw) \
+ (IEEE80211_EHT_GROUP_0 + \
+ IEEE80211_EHT_MAX_STREAMS * 3 * (_bw) + \
+ IEEE80211_EHT_MAX_STREAMS * (_gi) + \
+ (_streams) - 1)
+
+#define __EHT_GROUP(_streams, _gi, _bw, _s) \
+ [EHT_GROUP_IDX(_streams, _gi, _bw)] = { \
+ .shift = _s, \
+ .duration = { \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 3920, 1960, 980, 468, 234)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 5880, 2937, 1470, 702, 351)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 7840, 3920, 1960, 936, 468)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 11760, 5880, 2940, 1404, 702)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 15680, 7840, 3920, 1872, 936)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 17640, 8820, 4410, 2106, 1053)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 19600, 9800, 4900, 2340, 1170)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 23520, 11760, 5880, 2808, 1404)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 26133, 13066, 6533, 3120, 1560)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 29400, 14700, 7350, 3510, 1755)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 32666, 16333, 8166, 3900, 1950)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 35280, 17640, 8820, 4212, 2106)), \
+ EHT_DURATION_S(_s, _streams, _gi, \
+ EHT_BW2VBPS(_bw, 39200, 19600, 9800, 4680, 2340)) \
+ } \
+}
+
+#define EHT_GROUP_SHIFT(_streams, _gi, _bw) \
+ GROUP_SHIFT(EHT_DURATION(_streams, _gi, \
+ EHT_BW2VBPS(_bw, 1960, 980, 490, 234, 117)))
+
+#define EHT_GROUP(_streams, _gi, _bw) \
+ __EHT_GROUP(_streams, _gi, _bw, \
+ EHT_GROUP_SHIFT(_streams, _gi, _bw))
+
+#define EHT_GROUP_RANGE(_gi, _bw) \
+ EHT_GROUP(1, _gi, _bw), \
+ EHT_GROUP(2, _gi, _bw), \
+ EHT_GROUP(3, _gi, _bw), \
+ EHT_GROUP(4, _gi, _bw), \
+ EHT_GROUP(5, _gi, _bw), \
+ EHT_GROUP(6, _gi, _bw), \
+ EHT_GROUP(7, _gi, _bw), \
+ EHT_GROUP(8, _gi, _bw)
+
struct mcs_group {
u8 shift;
u16 duration[MCS_GROUP_RATES];
@@ -376,6 +459,26 @@ static const struct mcs_group airtime_mcs_groups[] = {
HE_GROUP(6, HE_GI_32, BW_160),
HE_GROUP(7, HE_GI_32, BW_160),
HE_GROUP(8, HE_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_20),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_20),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_40),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_40),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_80),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_80),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_160),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_160),
+
+ EHT_GROUP_RANGE(EHT_GI_08, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_16, BW_320),
+ EHT_GROUP_RANGE(EHT_GI_32, BW_320),
};
static u32
@@ -422,6 +525,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
case RATE_INFO_BW_160:
bw = BW_160;
break;
+ case RATE_INFO_BW_320:
+ bw = BW_320;
+ break;
default:
WARN_ON_ONCE(1);
return 0;
@@ -443,14 +549,27 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw,
idx = status->rate_idx;
group = HE_GROUP_IDX(streams, status->he_gi, bw);
break;
+ case RX_ENC_EHT:
+ streams = status->nss;
+ idx = status->rate_idx;
+ group = EHT_GROUP_IDX(streams, status->eht.gi, bw);
+ break;
default:
WARN_ON_ONCE(1);
return 0;
}
- if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) ||
- (status->encoding == RX_ENC_HE && streams > 8)))
- return 0;
+ switch (status->encoding) {
+ case RX_ENC_EHT:
+ case RX_ENC_HE:
+ if (WARN_ON_ONCE(streams > 8))
+ return 0;
+ break;
+ default:
+ if (WARN_ON_ONCE(streams > 4))
+ return 0;
+ break;
+ }
if (idx >= MCS_GROUP_RATES)
return 0;
@@ -517,7 +636,9 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
stat->nss = ri->nss;
stat->rate_idx = ri->mcs;
- if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
+ if (ri->flags & RATE_INFO_FLAGS_EHT_MCS)
+ stat->encoding = RX_ENC_EHT;
+ else if (ri->flags & RATE_INFO_FLAGS_HE_MCS)
stat->encoding = RX_ENC_HE;
else if (ri->flags & RATE_INFO_FLAGS_VHT_MCS)
stat->encoding = RX_ENC_VHT;
@@ -529,7 +650,14 @@ static bool ieee80211_fill_rate_info(struct ieee80211_hw *hw,
if (ri->flags & RATE_INFO_FLAGS_SHORT_GI)
stat->enc_flags |= RX_ENC_FLAG_SHORT_GI;
- stat->he_gi = ri->he_gi;
+ switch (stat->encoding) {
+ case RX_ENC_EHT:
+ stat->eht.gi = ri->eht_gi;
+ break;
+ default:
+ stat->he_gi = ri->he_gi;
+ break;
+ }
if (stat->encoding != RX_ENC_LEGACY)
return true;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b02b84ce2130..847304a3a29a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1662,12 +1662,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
ieee80211_link_info_change_notify(sdata, link,
BSS_CHANGED_BEACON_ENABLED);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[link_id].cac_started) {
chandef = link_conf->chanreq.oper;
- wiphy_delayed_work_cancel(wiphy, &sdata->dfs_cac_timer_work);
+ wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
drv_stop_ap(sdata->local, sdata, link_conf);
@@ -3462,51 +3462,58 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
static int ieee80211_start_radar_detection(struct wiphy *wiphy,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_chan_req chanreq = { .oper = *chandef };
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
int err;
lockdep_assert_wiphy(local->hw.wiphy);
- if (!list_empty(&local->roc_list) || local->scanning) {
- err = -EBUSY;
- goto out_unlock;
- }
+ if (!list_empty(&local->roc_list) || local->scanning)
+ return -EBUSY;
+
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ return -ENOLINK;
/* whatever, but channel contexts should not complain about that one */
- sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
- sdata->deflink.needed_rx_chains = local->rx_chains;
+ link_data->smps_mode = IEEE80211_SMPS_OFF;
+ link_data->needed_rx_chains = local->rx_chains;
- err = ieee80211_link_use_channel(&sdata->deflink, &chanreq,
+ err = ieee80211_link_use_channel(link_data, &chanreq,
IEEE80211_CHANCTX_SHARED);
if (err)
- goto out_unlock;
+ return err;
- wiphy_delayed_work_queue(wiphy, &sdata->dfs_cac_timer_work,
+ wiphy_delayed_work_queue(wiphy, &link_data->dfs_cac_timer_work,
msecs_to_jiffies(cac_time_ms));
- out_unlock:
- return err;
+ return 0;
}
static void ieee80211_end_cac(struct wiphy *wiphy,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_link_data *link_data;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
+ link_data = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link_data)
+ continue;
+
wiphy_delayed_work_cancel(wiphy,
- &sdata->dfs_cac_timer_work);
+ &link_data->dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
- sdata->wdev.cac_started = false;
+ if (sdata->wdev.links[link_id].cac_started) {
+ ieee80211_link_release_channel(link_data);
+ sdata->wdev.links[link_id].cac_started = false;
}
}
}
@@ -3961,7 +3968,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (!list_empty(&local->roc_list) || local->scanning)
return -EBUSY;
- if (sdata->wdev.cac_started)
+ if (sdata->wdev.links[link_id].cac_started)
return -EBUSY;
if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS))
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e8567723e94d..cca6d14084d2 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -286,7 +286,9 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link)
enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT;
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ lockdep_assert_wiphy(sdata->local->hw.wiphy);
+
+ list_for_each_entry(sta, &sdata->local->sta_list, list) {
if (sdata != sta->sdata &&
!(sta->sdata->bss && sta->sdata->bss == sdata->bss))
continue;
@@ -681,6 +683,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local,
ctx->mode = mode;
ctx->conf.radar_enabled = false;
ctx->conf.radio_idx = radio_idx;
+ ctx->radar_detected = false;
_ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false);
return ctx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 79caeb485fd5..1c2b7dd8976a 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -467,20 +467,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
return;
skb_reserve(skb, local->hw.extra_tx_headroom);
- mgmt = skb_put_zero(skb, 24);
- memcpy(mgmt->da, da, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- if (sdata->vif.type == NL80211_IFTYPE_AP ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_STATION)
- memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN);
- else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
- memcpy(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN);
-
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
- IEEE80211_STYPE_ACTION);
+ mgmt = ieee80211_mgmt_ba(skb, da, sdata);
skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a3485e4c6132..4f0390918b60 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -893,6 +893,8 @@ struct ieee80211_chanctx {
struct ieee80211_chan_req req;
struct ieee80211_chanctx_conf conf;
+
+ bool radar_detected;
};
struct mac80211_qos_map {
@@ -1067,6 +1069,7 @@ struct ieee80211_link_data {
int ap_power_level; /* in dBm */
bool radar_required;
+ struct wiphy_delayed_work dfs_cac_timer_work;
union {
struct ieee80211_link_data_managed mgd;
@@ -1165,8 +1168,6 @@ struct ieee80211_sub_if_data {
struct ieee80211_link_data deflink;
struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS];
- struct wiphy_delayed_work dfs_cac_timer_work;
-
/* for ieee80211_set_active_links_async() */
struct wiphy_work activate_links_work;
u16 desired_active_links;
@@ -2072,8 +2073,6 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
u32 info_flags,
u32 ctrl_flags,
u64 *cookie);
-void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
- struct sk_buff_head *skbs);
struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
@@ -2136,6 +2135,29 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
struct ieee80211_mgmt *mgmt,
size_t len);
+static inline struct ieee80211_mgmt *
+ieee80211_mgmt_ba(struct sk_buff *skb, const u8 *da,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_mgmt *mgmt = skb_put_zero(skb, 24);
+
+ ether_addr_copy(mgmt->da, da);
+ ether_addr_copy(mgmt->sa, sdata->vif.addr);
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP ||
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+ ether_addr_copy(mgmt->bssid, sdata->vif.addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ether_addr_copy(mgmt->bssid, sdata->vif.cfg.ap_addr);
+ else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
+ ether_addr_copy(mgmt->bssid, sdata->u.ibss.bssid);
+
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ return mgmt;
+}
+
int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
@@ -2629,7 +2651,8 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
bool ieee80211_is_radar_required(struct ieee80211_local *local);
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work);
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local);
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *chanctx);
void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
struct wiphy_work *work);
int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ad66af3af3..6ef0990d3d29 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
{
struct ieee80211_local *local = sdata->local;
unsigned long flags;
+ struct sk_buff_head freeq;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
int i, flushed;
@@ -550,15 +551,15 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
wiphy_work_cancel(local->hw.wiphy,
&sdata->deflink.color_change_finalize_work);
wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
+ &sdata->deflink.dfs_cac_timer_work);
- if (sdata->wdev.cac_started) {
+ if (sdata->wdev.links[0].cac_started) {
chandef = sdata->vif.bss_conf.chanreq.oper;
WARN_ON(local->suspended);
ieee80211_link_release_channel(&sdata->deflink);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, 0);
}
if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -637,18 +638,32 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
skb_queue_purge(&sdata->status_queue);
}
+ /*
+ * Since ieee80211_free_txskb() may issue __dev_queue_xmit()
+ * which should be called with interrupts enabled, reclamation
+ * is done in two phases:
+ */
+ __skb_queue_head_init(&freeq);
+
+ /* unlink from local queues... */
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
if (info->control.vif == &sdata->vif) {
__skb_unlink(skb, &local->pending[i]);
- ieee80211_free_txskb(&local->hw, skb);
+ __skb_queue_tail(&freeq, skb);
}
}
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ /* ... and perform actual reclamation with interrupts enabled. */
+ skb_queue_walk_safe(&freeq, skb, tmp) {
+ __skb_unlink(skb, &freeq);
+ ieee80211_free_txskb(&local->hw, skb);
+ }
+
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
ieee80211_txq_remove_vlan(local, sdata);
@@ -1729,8 +1744,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
wiphy_work_init(&sdata->work, ieee80211_iface_work);
wiphy_work_init(&sdata->activate_links_work,
ieee80211_activate_links_work);
- wiphy_delayed_work_init(&sdata->dfs_cac_timer_work,
- ieee80211_dfs_cac_timer_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index 1a211b8d4057..0bbac64d5fa0 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -45,6 +45,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
+ wiphy_delayed_work_init(&link->dfs_cac_timer_work,
+ ieee80211_dfs_cac_timer_work);
if (!deflink) {
switch (sdata->vif.type) {
@@ -75,6 +77,16 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
&link->color_change_finalize_work);
wiphy_work_cancel(link->sdata->local->hw.wiphy,
&link->csa.finalize_work);
+
+ if (link->sdata->wdev.links[link->link_id].cac_started) {
+ wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+ cfg80211_cac_event(link->sdata->dev,
+ &link->conf->chanreq.oper,
+ NL80211_RADAR_CAC_ABORTED,
+ GFP_KERNEL, link->link_id);
+ }
+
ieee80211_link_release_channel(link);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a3104b6ea6f0..89084690350f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1051,9 +1051,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
return 0;
/* Driver provides cipher suites, but we need to exclude WEP */
- suites = kmemdup(local->hw.wiphy->cipher_suites,
- sizeof(u32) * local->hw.wiphy->n_cipher_suites,
- GFP_KERNEL);
+ suites = kmemdup_array(local->hw.wiphy->cipher_suites,
+ local->hw.wiphy->n_cipher_suites,
+ sizeof(u32), GFP_KERNEL);
if (!suites)
return -ENOMEM;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index c0a5c75cddcb..30c0d89203af 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -580,7 +580,7 @@ void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata,
prev = rhashtable_lookup_get_insert_fast(&cache->rht,
&entry->rhash,
fast_tx_rht_params);
- if (unlikely(IS_ERR(prev))) {
+ if (IS_ERR(prev)) {
kfree(entry);
goto unlock_cache;
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f9526bbc3633..735e78adb0db 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1231,7 +1231,7 @@ static bool ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
bool disable_mu_mimo = false;
struct ieee80211_sub_if_data *other;
- list_for_each_entry_rcu(other, &local->interfaces, list) {
+ list_for_each_entry(other, &local->interfaces, list) {
if (other->vif.bss_conf.mu_mimo_owner) {
disable_mu_mimo = true;
break;
@@ -3031,18 +3031,19 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t)
void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work)
{
- struct ieee80211_sub_if_data *sdata =
- container_of(work, struct ieee80211_sub_if_data,
+ struct ieee80211_link_data *link =
+ container_of(work, struct ieee80211_link_data,
dfs_cac_timer_work.work);
- struct cfg80211_chan_def chandef = sdata->vif.bss_conf.chanreq.oper;
+ struct cfg80211_chan_def chandef = link->conf->chanreq.oper;
+ struct ieee80211_sub_if_data *sdata = link->sdata;
lockdep_assert_wiphy(sdata->local->hw.wiphy);
- if (sdata->wdev.cac_started) {
- ieee80211_link_release_channel(&sdata->deflink);
+ if (sdata->wdev.links[link->link_id].cac_started) {
+ ieee80211_link_release_channel(link);
cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_FINISHED,
- GFP_KERNEL);
+ GFP_KERNEL, link->link_id);
}
}
@@ -4715,7 +4716,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
((assoc_data->wmm && !elems->wmm_param) ||
(link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT &&
(!elems->ht_cap_elem || !elems->ht_operation)) ||
- (link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
+ (is_5ghz && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT &&
(!elems->vht_cap_elem || !elems->vht_operation)))) {
const struct cfg80211_bss_ies *ies;
struct ieee802_11_elems *bss_elems;
@@ -4763,19 +4764,22 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
sdata_info(sdata,
"AP bug: HT operation missing from AssocResp\n");
}
- if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_cap_elem = bss_elems->vht_cap_elem;
- sdata_info(sdata,
- "AP bug: VHT capa missing from AssocResp\n");
- }
- if (!elems->vht_operation && bss_elems->vht_operation &&
- link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
- elems->vht_operation = bss_elems->vht_operation;
- sdata_info(sdata,
- "AP bug: VHT operation missing from AssocResp\n");
- }
+ if (is_5ghz) {
+ if (!elems->vht_cap_elem && bss_elems->vht_cap_elem &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_cap_elem = bss_elems->vht_cap_elem;
+ sdata_info(sdata,
+ "AP bug: VHT capa missing from AssocResp\n");
+ }
+
+ if (!elems->vht_operation && bss_elems->vht_operation &&
+ link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_VHT) {
+ elems->vht_operation = bss_elems->vht_operation;
+ sdata_info(sdata,
+ "AP bug: VHT operation missing from AssocResp\n");
+ }
+ }
kfree(bss_elems);
}
@@ -7660,6 +7664,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
lockdep_assert_wiphy(sdata->local->hw.wiphy);
assoc_data->tries++;
+ assoc_data->comeback = false;
if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) {
sdata_info(sdata, "association with %pM timed out\n",
assoc_data->ap_addr);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 28d03196ef75..29fab7ae47b4 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -997,6 +997,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
}
IEEE80211_SKB_CB(skb)->flags = flags;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
skb->dev = sdata->dev;
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d823d58303e8..7be52345f218 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -32,7 +32,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_scan_cancel(local);
- ieee80211_dfs_cac_cancel(local);
+ ieee80211_dfs_cac_cancel(local, NULL);
ieee80211_roc_purge(local, NULL);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 4dc1def69548..3dc9752188d5 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -890,7 +890,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
- if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX))
+ if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK))
mask = sdata->rc_rateidx_mask[info->band];
if (dest[0].idx < 0)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b5f2df61c7f6..adb88c06b598 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -504,7 +504,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
* the scan was in progress; if there was none this will
* just be a no-op for the particular interface.
*/
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry(sdata, &local->interfaces, list) {
if (ieee80211_sdata_running(sdata))
wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
@@ -575,6 +575,7 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sub_if_data *sdata_iter;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -585,8 +586,9 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
return false;
list_for_each_entry(sdata_iter, &local->interfaces, list) {
- if (sdata_iter->wdev.cac_started)
- return false;
+ for_each_valid_link(&sdata_iter->wdev, link_id)
+ if (sdata_iter->wdev.links[link_id].cac_started)
+ return false;
}
return true;
@@ -649,7 +651,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata,
cpu_to_le16(IEEE80211_SN_TO_SEQ(sn));
}
IEEE80211_SKB_CB(skb)->flags |= tx_flags;
- IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX;
+ IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK;
ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band);
}
}
@@ -1013,10 +1015,8 @@ set_channel:
*/
if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
!scan_req->n_ssids) {
- *next_delay = msecs_to_jiffies(scan_req->duration) >
- IEEE80211_PASSIVE_CHANNEL_TIME ?
- msecs_to_jiffies(scan_req->duration) :
- IEEE80211_PASSIVE_CHANNEL_TIME;
+ *next_delay = max(msecs_to_jiffies(scan_req->duration),
+ IEEE80211_PASSIVE_CHANNEL_TIME);
local->next_scan_state = SCAN_DECISION;
if (scan_req->n_ssids)
set_bit(SCAN_BEACON_WAIT, &local->scanning);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index dd8f857a1fbc..d1cf987de13b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -1301,3 +1301,4 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
while ((skb = __skb_dequeue(skbs)))
ieee80211_free_txskb(hw, skb);
}
+EXPORT_SYMBOL(ieee80211_purge_tx_queue);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index bca7b341dd77..a9ee86982259 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -699,7 +699,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) {
+ if (unlikely(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) {
txrc.rate_idx_mask = ~0;
} else {
txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c7ad9bc5973a..f94faa86ba8a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -751,7 +751,9 @@ static void __iterate_interfaces(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata;
bool active_only = iter_flags & IEEE80211_IFACE_ITER_ACTIVE;
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ list_for_each_entry_rcu(sdata, &local->interfaces, list,
+ lockdep_is_held(&local->iflist_mtx) ||
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
@@ -833,7 +835,8 @@ static void __iterate_stations(struct ieee80211_local *local,
{
struct sta_info *sta;
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ list_for_each_entry_rcu(sta, &local->sta_list, list,
+ lockdep_is_held(&local->hw.wiphy->mtx)) {
if (!sta->uploaded)
continue;
@@ -854,6 +857,19 @@ void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_atomic);
+void ieee80211_iterate_stations_mtx(struct ieee80211_hw *hw,
+ void (*iterator)(void *data,
+ struct ieee80211_sta *sta),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ __iterate_stations(local, iterator, data);
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_stations_mtx);
+
struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -3451,24 +3467,44 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
return ts;
}
-void ieee80211_dfs_cac_cancel(struct ieee80211_local *local)
+/* Cancel CAC for the interfaces under the specified @local. If @ctx is
+ * also provided, only the interfaces using that ctx will be canceled.
+ */
+void ieee80211_dfs_cac_cancel(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
struct ieee80211_sub_if_data *sdata;
struct cfg80211_chan_def chandef;
+ struct ieee80211_link_data *link;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ unsigned int link_id;
lockdep_assert_wiphy(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
- wiphy_delayed_work_cancel(local->hw.wiphy,
- &sdata->dfs_cac_timer_work);
-
- if (sdata->wdev.cac_started) {
- chandef = sdata->vif.bss_conf.chanreq.oper;
- ieee80211_link_release_channel(&sdata->deflink);
- cfg80211_cac_event(sdata->dev,
- &chandef,
+ for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS;
+ link_id++) {
+ link = sdata_dereference(sdata->link[link_id],
+ sdata);
+ if (!link)
+ continue;
+
+ chanctx_conf = sdata_dereference(link->conf->chanctx_conf,
+ sdata);
+ if (ctx && &ctx->conf != chanctx_conf)
+ continue;
+
+ wiphy_delayed_work_cancel(local->hw.wiphy,
+ &link->dfs_cac_timer_work);
+
+ if (!sdata->wdev.links[link_id].cac_started)
+ continue;
+
+ chandef = link->conf->chanreq.oper;
+ ieee80211_link_release_channel(link);
+ cfg80211_cac_event(sdata->dev, &chandef,
NL80211_RADAR_CAC_ABORTED,
- GFP_KERNEL);
+ GFP_KERNEL, link_id);
}
}
}
@@ -3478,9 +3514,8 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, radar_detected_work);
- struct cfg80211_chan_def chandef = local->hw.conf.chandef;
+ struct cfg80211_chan_def chandef;
struct ieee80211_chanctx *ctx;
- int num_chanctx = 0;
lockdep_assert_wiphy(local->hw.wiphy);
@@ -3488,25 +3523,46 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy,
if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
continue;
- num_chanctx++;
+ if (!ctx->radar_detected)
+ continue;
+
+ ctx->radar_detected = false;
+
chandef = ctx->conf.def;
+
+ ieee80211_dfs_cac_cancel(local, ctx);
+ cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
}
+}
- ieee80211_dfs_cac_cancel(local);
+static void
+ieee80211_radar_mark_chan_ctx_iterator(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf,
+ void *data)
+{
+ struct ieee80211_chanctx *ctx =
+ container_of(chanctx_conf, struct ieee80211_chanctx,
+ conf);
- if (num_chanctx > 1)
- /* XXX: multi-channel is not supported yet */
- WARN_ON(1);
- else
- cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
+ if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER)
+ return;
+
+ if (data && data != chanctx_conf)
+ return;
+
+ ctx->radar_detected = true;
}
-void ieee80211_radar_detected(struct ieee80211_hw *hw)
+void ieee80211_radar_detected(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *chanctx_conf)
{
struct ieee80211_local *local = hw_to_local(hw);
trace_api_radar_detected(local);
+ ieee80211_iter_chan_contexts_atomic(hw, ieee80211_radar_mark_chan_ctx_iterator,
+ chanctx_conf);
+
wiphy_work_queue(hw->wiphy, &local->radar_detected_work);
}
EXPORT_SYMBOL(ieee80211_radar_detected);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index de52a9191da0..43288b408fde 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -486,6 +486,9 @@ static int mctp_ioctl_droptag(struct mctp_sock *msk, bool tagv2,
tag = ctl.tag & MCTP_TAG_MASK;
rc = -EINVAL;
+ if (ctl.peer_addr == MCTP_ADDR_NULL)
+ ctl.peer_addr = MCTP_ADDR_ANY;
+
spin_lock_irqsave(&net->mctp.keys_lock, flags);
hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
/* we do an irqsave here, even though we know the irq state,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 0e6c94a8c2bc..aba983531ed3 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1201,8 +1201,7 @@ static void mpls_netconf_notify_devconf(struct net *net, int event,
rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
}
static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
@@ -2278,8 +2277,7 @@ static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
}
static int mpls_valid_getroute_req(struct sk_buff *skb,
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 4385fd3b13be..6e73da94af7f 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -106,7 +106,7 @@ static int mpls_xmit(struct sk_buff *skb)
hh_len = 0;
/* Ensure there is enough space for the headers in the skb */
- if (skb_cow(skb, hh_len + new_header_size))
+ if (skb_cow_head(skb, hh_len + new_header_size))
goto drop;
skb_set_inner_protocol(skb, skb->protocol);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 99382c317ebb..38d8121331d4 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -12,6 +12,7 @@
#include <net/netns/generic.h>
#include "protocol.h"
+#include "mib.h"
#define MPTCP_SYSCTL_PATH "net/mptcp"
@@ -27,8 +28,11 @@ struct mptcp_pernet {
#endif
unsigned int add_addr_timeout;
+ unsigned int blackhole_timeout;
unsigned int close_timeout;
unsigned int stale_loss_cnt;
+ atomic_t active_disable_times;
+ unsigned long active_disable_stamp;
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->blackhole_timeout = 3600;
+ atomic_set(&pernet->active_disable_times, 0);
pernet->close_timeout = TCP_TIMEWAIT_LEN;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
return ret;
}
+static int proc_blackhole_detect_timeout(const struct ctl_table *table,
+ int write, void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ atomic_set(&pernet->active_disable_times, 0);
+
+ return ret;
+}
+
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "blackhole_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_blackhole_detect_timeout,
+ .extra1 = SYSCTL_ZERO,
+ },
};
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[6].data = &pernet->scheduler;
/* table[7] is for available_schedulers which is read-only info */
table[8].data = &pernet->close_timeout;
+ table[9].data = &pernet->blackhole_timeout;
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
#endif /* CONFIG_SYSCTL */
+/* The following code block is to deal with middle box issues with MPTCP,
+ * similar to what is done with TFO.
+ * The proposed solution is to disable active MPTCP globally when SYN+MPC are
+ * dropped, while SYN without MPC aren't. In this case, active side MPTCP is
+ * disabled globally for 1hr at first. Then if it happens again, it is disabled
+ * for 2h, then 4h, 8h, ...
+ * The timeout is reset back to 1hr when a successful active MPTCP connection is
+ * fully established.
+ */
+
+/* Disable active MPTCP and record current jiffies and active_disable_times */
+void mptcp_active_disable(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct mptcp_pernet *pernet;
+
+ pernet = mptcp_get_pernet(net);
+
+ if (!READ_ONCE(pernet->blackhole_timeout))
+ return;
+
+ /* Paired with READ_ONCE() in mptcp_active_should_disable() */
+ WRITE_ONCE(pernet->active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in mptcp_active_should_disable().
+ * We want pernet->active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
+ atomic_inc(&pernet->active_disable_times);
+
+ MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
+}
+
+/* Calculate timeout for MPTCP active disable
+ * Return true if we are still in the active MPTCP disable period
+ * Return false if timeout already expired and we should use active MPTCP
+ */
+bool mptcp_active_should_disable(struct sock *ssk)
+{
+ struct net *net = sock_net(ssk);
+ unsigned int blackhole_timeout;
+ struct mptcp_pernet *pernet;
+ unsigned long timeout;
+ int disable_times;
+ int multiplier;
+
+ pernet = mptcp_get_pernet(net);
+ blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
+
+ if (!blackhole_timeout)
+ return false;
+
+ disable_times = atomic_read(&pernet->active_disable_times);
+ if (!disable_times)
+ return false;
+
+ /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
+ multiplier = 1 << min(disable_times - 1, 6);
+
+ /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
+ timeout = READ_ONCE(pernet->active_disable_stamp) +
+ multiplier * blackhole_timeout * HZ;
+
+ return time_before(jiffies, timeout);
+}
+
+/* Enable active MPTCP and reset active_disable_times if needed */
+void mptcp_active_enable(struct sock *sk)
+{
+ struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
+
+ if (atomic_read(&pernet->active_disable_times)) {
+ struct dst_entry *dst = sk_dst_get(sk);
+
+ if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
+ atomic_set(&pernet->active_disable_times, 0);
+ }
+}
+
+/* Check the number of retransmissions, and fallback to TCP if needed */
+void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
+{
+ struct mptcp_subflow_context *subflow;
+ u32 timeouts;
+
+ if (!sk_is_mptcp(ssk))
+ return;
+
+ timeouts = inet_csk(ssk)->icsk_retransmits;
+ subflow = mptcp_subflow_ctx(ssk);
+
+ if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
+ if (timeouts == 2 || (timeouts < 2 && expired)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
+ subflow->mpc_drop = 1;
+ mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
+ } else {
+ subflow->mpc_drop = 0;
+ }
+ }
+}
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 7884217f33eb..38c2efc82b94 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
+ SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
+ SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -25,6 +27,10 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX),
+ SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR),
+ SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR),
+ SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
@@ -69,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
+ SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 66aa67f49d03..c8ffe18a8722 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -10,6 +10,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
+ MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
+ MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -20,6 +22,10 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
@@ -70,6 +76,7 @@ enum linux_mptcp_mib_field {
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
+ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 37f6dbcd8434..620264c75dc2 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -430,17 +430,6 @@ bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_is_backup(msk, &skc_local);
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- *flags = 0;
- *ifindex = 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
- return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
-}
-
int mptcp_pm_get_addr(struct sk_buff *skb, struct genl_info *info)
{
if (info->attrs[MPTCP_PM_ATTR_TOKEN])
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index ad935d34c973..64fe0e7d87d7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -149,7 +149,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
static bool
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -164,7 +164,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -175,7 +177,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
static bool
select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *new_entry)
+ struct mptcp_pm_local *new_local)
{
struct mptcp_pm_addr_entry *entry;
bool found = false;
@@ -193,7 +195,9 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
continue;
- *new_entry = *entry;
+ new_local->addr = entry->addr;
+ new_local->flags = entry->flags;
+ new_local->ifindex = entry->ifindex;
found = true;
break;
}
@@ -530,11 +534,11 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- struct mptcp_pm_addr_entry local;
unsigned int add_addr_signal_max;
bool signal_and_subflow = false;
unsigned int local_addr_max;
struct pm_nl_pernet *pernet;
+ struct mptcp_pm_local local;
unsigned int subflows_max;
pernet = pm_nl_get_pernet(sock_net(sk));
@@ -635,7 +639,7 @@ subflow:
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- __mptcp_subflow_connect(sk, &local.addr, &addrs[i]);
+ __mptcp_subflow_connect(sk, &local, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
mptcp_pm_nl_check_work_pending(msk);
@@ -656,7 +660,7 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *remote,
- struct mptcp_addr_info *addrs)
+ struct mptcp_pm_local *locals)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
@@ -679,13 +683,15 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
continue;
if (msk->pm.subflows < subflows_max) {
- msk->pm.subflows++;
- addrs[i] = entry->addr;
+ locals[i].addr = entry->addr;
+ locals[i].flags = entry->flags;
+ locals[i].ifindex = entry->ifindex;
/* Special case for ID0: set the correct ID */
- if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port))
- addrs[i].id = 0;
+ if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port))
+ locals[i].addr.id = 0;
+ msk->pm.subflows++;
i++;
}
}
@@ -695,21 +701,19 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
- struct mptcp_addr_info local;
-
- memset(&local, 0, sizeof(local));
- local.family =
+ memset(&locals[i], 0, sizeof(locals[i]));
+ locals[i].addr.family =
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
#endif
remote->family;
- if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote))
return 0;
msk->pm.subflows++;
- addrs[i++] = local;
+ i++;
}
return i;
@@ -717,7 +721,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
- struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
@@ -746,13 +750,13 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, &remote, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, locals);
if (nr == 0)
return;
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
- if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0)
+ if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
sf_created = true;
spin_lock_bh(&msk->pm.lock);
@@ -1439,28 +1443,6 @@ out_free:
return ret;
}
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *entry;
- struct sock *sk = (struct sock *)msk;
- struct net *net = sock_net(sk);
-
- /* No entries with ID 0 */
- if (id == 0)
- return 0;
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
-
- return 0;
-}
-
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
@@ -1677,8 +1659,8 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list)
}
/* Called from the in-kernel PM only */
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1706,8 +1688,8 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
}
-static void mptcp_nl_remove_addrs_list(struct net *net,
- struct list_head *rm_list)
+static void mptcp_nl_flush_addrs_list(struct net *net,
+ struct list_head *rm_list)
{
long s_slot = 0, s_num = 0;
struct mptcp_sock *msk;
@@ -1720,7 +1702,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
if (!mptcp_pm_is_userspace(msk)) {
lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
release_sock(sk);
}
@@ -1761,7 +1743,7 @@ int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info)
pernet->next_id = 1;
bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
- mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list);
synchronize_rcu();
__flush_addrs(&free_list);
return 0;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8eaa9fbe3e34..2cceded3a83a 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -119,23 +119,6 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id)
return NULL;
}
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex)
-{
- struct mptcp_pm_addr_entry *match;
-
- spin_lock_bh(&msk->pm.lock);
- match = mptcp_userspace_pm_lookup_addr_by_id(msk, id);
- spin_unlock_bh(&msk->pm.lock);
- if (match) {
- *flags = match->flags;
- *ifindex = match->ifindex;
- }
-
- return 0;
-}
-
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
@@ -352,8 +335,9 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct mptcp_pm_addr_entry local = { 0 };
+ struct mptcp_pm_addr_entry entry = { 0 };
struct mptcp_addr_info addr_r;
+ struct mptcp_pm_local local;
struct mptcp_sock *msk;
int err = -EINVAL;
struct sock *sk;
@@ -379,18 +363,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- err = mptcp_pm_parse_entry(laddr, info, true, &local);
+ err = mptcp_pm_parse_entry(laddr, info, true, &entry);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
goto create_err;
}
- if (local.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ if (entry.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
GENL_SET_ERR_MSG(info, "invalid addr flags");
err = -EINVAL;
goto create_err;
}
- local.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ entry.flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
@@ -398,27 +382,29 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}
- if (!mptcp_pm_addr_families_match(sk, &local.addr, &addr_r)) {
+ if (!mptcp_pm_addr_families_match(sk, &entry.addr, &addr_r)) {
GENL_SET_ERR_MSG(info, "families mismatch");
err = -EINVAL;
goto create_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &entry, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
}
- lock_sock(sk);
-
- err = __mptcp_subflow_connect(sk, &local.addr, &addr_r);
+ local.addr = entry.addr;
+ local.flags = entry.flags;
+ local.ifindex = entry.ifindex;
+ lock_sock(sk);
+ err = __mptcp_subflow_connect(sk, &local, &addr_r);
release_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (err)
- mptcp_userspace_pm_delete_local_addr(msk, &local);
+ mptcp_userspace_pm_delete_local_addr(msk, &entry);
else
msk->pm.subflows++;
spin_unlock_bh(&msk->pm.lock);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 37ebcb7640eb..c2317919fc14 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3717,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
return 0;
}
-static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- subflow->request_mptcp = 0;
- __mptcp_do_fallback(msk);
-}
-
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct mptcp_subflow_context *subflow;
@@ -3744,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
mptcp_subflow_early_fallback(msk, subflow);
#endif
- if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
- mptcp_subflow_early_fallback(msk, subflow);
+ if (subflow->request_mptcp) {
+ if (mptcp_active_should_disable(sk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
+ mptcp_subflow_early_fallback(msk, subflow);
+ } else if (mptcp_token_new_connect(ssk) < 0) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
+ mptcp_subflow_early_fallback(msk, subflow);
+ }
}
WRITE_ONCE(msk->write_seq, subflow->idsn);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3b22313d1b86..74417aae08d0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -236,6 +236,12 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_local {
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+};
+
struct mptcp_pm_addr_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -525,7 +531,8 @@ struct mptcp_subflow_context {
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
close_event_done : 1, /* has done the post-closed part */
- __unused : 9;
+ mpc_drop : 1, /* the MPC option has been dropped in a rtx */
+ __unused : 8;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -691,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
+
+void mptcp_active_disable(struct sock *sk);
+bool mptcp_active_should_disable(struct sock *ssk);
+void mptcp_active_enable(struct sock *sk);
+
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
@@ -719,7 +731,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
struct socket **new_sock);
@@ -1014,14 +1026,6 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex);
-int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
- unsigned int id,
- u8 *flags, int *ifindex);
int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info);
int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info);
@@ -1154,7 +1158,6 @@ static inline void mptcp_pm_close_subflow(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
-void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
@@ -1218,6 +1221,14 @@ static inline void mptcp_do_fallback(struct sock *ssk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
+static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow)
+{
+ pr_fallback(msk);
+ subflow->request_mptcp = 0;
+ __mptcp_do_fallback(msk);
+}
+
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
{
struct mptcp_ext *mpext;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 064ab3235893..1040b3b9696b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_capable = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_active_enable(parent);
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
}
} else if (mptcp_check_fallback(sk)) {
+ /* It looks like MPTCP is blocked, while TCP is not */
+ if (subflow->mpc_drop)
+ mptcp_active_disable(parent);
fallback:
mptcp_propagate_state(parent, sk, subflow, NULL);
}
@@ -1565,28 +1569,31 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
#endif
}
-int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
+int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
+ int local_id = local->addr.id;
struct sockaddr_storage addr;
int remote_id = remote->id;
- int local_id = loc->id;
int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
- int ifindex;
- u8 flags;
+ /* The userspace PM sent the request too early? */
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, loc->family, &sf);
- if (err)
+ err = mptcp_subflow_create_socket(sk, local->addr.family, &sf);
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCREATSKERR);
+ pr_debug("msk=%p local=%d remote=%d create sock error: %d\n",
+ msk, local_id, remote_id, err);
goto err_out;
+ }
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1594,26 +1601,39 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (local_id)
+ /* if 'IPADDRANY', the ID will be set later, after the routing */
+ if (local->addr.family == AF_INET) {
+ if (!local->addr.addr.s_addr)
+ local_id = -1;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ } else if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&local->addr.addr6))
+ local_id = -1;
+#endif
+ }
+
+ if (local_id >= 0)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
- &flags, &ifindex);
subflow->remote_key_valid = 1;
subflow->remote_key = READ_ONCE(msk->remote_key);
subflow->local_key = READ_ONCE(msk->local_key);
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
+ mptcp_info2sockaddr(&local->addr, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
- ssk->sk_bound_dev_if = ifindex;
+ ssk->sk_bound_dev_if = local->ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
- if (err)
+ if (err) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXBINDERR);
+ pr_debug("msk=%p local=%d remote=%d bind error: %d\n",
+ msk, local_id, remote_id, err);
goto failed;
+ }
mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL);
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d\n", msk,
@@ -1621,15 +1641,21 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_token = remote_token;
WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
- subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->request_bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
- if (err && err != -EINPROGRESS)
+ if (err && err != -EINPROGRESS) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTXCONNECTERR);
+ pr_debug("msk=%p local=%d remote=%d connect error: %d\n",
+ msk, local_id, remote_id, err);
goto failed_unlink;
+ }
+
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNTX);
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b00fc285b334..b9f551f02c81 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -655,11 +655,9 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e)
{
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
int ret;
- INIT_LIST_HEAD(&sublist);
-
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
ret = nf_hook_slow(skb, state, e, 0);
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 34ba14e59e95..4890af4dc263 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -522,11 +522,10 @@ unsigned int nf_conncount_count(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_conncount_count);
-struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
- unsigned int keylen)
+struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
{
struct nf_conncount_data *data;
- int ret, i;
+ int i;
if (keylen % sizeof(u32) ||
keylen / sizeof(u32) > MAX_KEYLEN ||
@@ -539,12 +538,6 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
if (!data)
return ERR_PTR(-ENOMEM);
- ret = nf_ct_netns_get(net, family);
- if (ret < 0) {
- kfree(data);
- return ERR_PTR(ret);
- }
-
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
data->root[i] = RB_ROOT;
@@ -581,13 +574,11 @@ static void destroy_tree(struct rb_root *r)
}
}
-void nf_conncount_destroy(struct net *net, unsigned int family,
- struct nf_conncount_data *data)
+void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data)
{
unsigned int i;
cancel_work_sync(&data->gc_work);
- nf_ct_netns_put(net, family);
for (i = 0; i < ARRAY_SIZE(data->root); ++i)
destroy_tree(&data->root[i]);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9384426ddc06..d3cb53b008f5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1722,7 +1722,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
- return (struct nf_conntrack_tuple_hash *)ct;
+ return ERR_CAST(ct);
if (!nf_ct_add_synproxy(ct, tmpl)) {
nf_conntrack_free(ct);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4cbf71d0786b..123e2e933e9b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1579,9 +1579,6 @@ static int ctnetlink_flush_conntrack(struct net *net,
};
if (ctnetlink_needs_filter(family, cda)) {
- if (cda[CTA_FILTER])
- return -EOPNOTSUPP;
-
filter = ctnetlink_alloc_filter(cda, family);
if (IS_ERR(filter))
return PTR_ERR(filter);
@@ -1610,14 +1607,14 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
if (err < 0)
return err;
- if (cda[CTA_TUPLE_ORIG])
+ if (cda[CTA_TUPLE_ORIG] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
family, &zone);
- else if (cda[CTA_TUPLE_REPLY])
+ else if (cda[CTA_TUPLE_REPLY] && !cda[CTA_FILTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
family, &zone);
else {
- u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
+ u8 u3 = info->nfmsg->version || cda[CTA_FILTER] ? family : AF_UNSPEC;
return ctnetlink_flush_conntrack(info->net, cda,
NETLINK_CB(skb).portid,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 016c816d91cb..6d8da6dddf99 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1104,7 +1104,7 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
if (!nat_proto_net->nat_hook_ops) {
WARN_ON(nat_proto_net->users != 0);
- nat_ops = kmemdup(orig_nat_ops, sizeof(*orig_nat_ops) * ops_count, GFP_KERNEL);
+ nat_ops = kmemdup_array(orig_nat_ops, ops_count, sizeof(*orig_nat_ops), GFP_KERNEL);
if (!nat_ops) {
mutex_unlock(&nf_nat_proto_mutex);
return -ENOMEM;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0a2f79346958..57259b5f3ef5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -146,6 +146,8 @@ static void nft_ctx_init(struct nft_ctx *ctx,
ctx->report = nlmsg_report(nlh);
ctx->flags = nlh->nlmsg_flags;
ctx->seq = nlh->nlmsg_seq;
+
+ bitmap_zero(ctx->reg_inited, NFT_REG32_NUM);
}
static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
@@ -393,6 +395,7 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans_binding *binding;
+ struct nft_trans_set *trans_set;
list_add_tail(&trans->list, &nft_net->commit_list);
@@ -402,9 +405,13 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
+ trans_set = nft_trans_container_set(trans);
+
if (!nft_trans_set_update(trans) &&
nft_set_is_anonymous(nft_trans_set(trans)))
list_add_tail(&binding->binding_list, &nft_net->binding_list);
+
+ list_add_tail(&trans_set->list_trans_newset, &nft_net->commit_set_list);
break;
case NFT_MSG_NEWCHAIN:
if (!nft_trans_chain_update(trans) &&
@@ -611,6 +618,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
trans_set = nft_trans_container_set(trans);
INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list);
+ INIT_LIST_HEAD(&trans_set->list_trans_newset);
if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
@@ -3878,7 +3886,6 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *r
int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
{
struct nft_expr *expr, *last;
- const struct nft_data *data;
struct nft_rule *rule;
int err;
@@ -3899,7 +3906,7 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
/* This may call nft_chain_validate() recursively,
* callers that do so must increment ctx->level.
*/
- err = expr->ops->validate(ctx, expr, &data);
+ err = expr->ops->validate(ctx, expr);
if (err < 0)
return err;
}
@@ -4485,17 +4492,16 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net,
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
- struct nft_trans *trans;
+ struct nft_trans_set *trans;
- list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->msg_type == NFT_MSG_NEWSET) {
- struct nft_set *set = nft_trans_set(trans);
+ /* its likely the id we need is at the tail, not at start */
+ list_for_each_entry_reverse(trans, &nft_net->commit_set_list, list_trans_newset) {
+ struct nft_set *set = trans->set;
- if (id == nft_trans_set_id(trans) &&
- set->table == table &&
- nft_active_genmask(set, genmask))
- return set;
- }
+ if (id == trans->set_id &&
+ set->table == table &&
+ nft_active_genmask(set, genmask))
+ return set;
}
return ERR_PTR(-ENOENT);
}
@@ -4587,7 +4593,7 @@ int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
return -ERANGE;
ms *= NSEC_PER_MSEC;
- *result = nsecs_to_jiffies64(ms);
+ *result = nsecs_to_jiffies64(ms) ? : !!ms;
return 0;
}
@@ -5688,12 +5694,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
.align = __alignof__(u8),
},
[NFT_SET_EXT_TIMEOUT] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
- },
- [NFT_SET_EXT_EXPIRATION] = {
- .len = sizeof(u64),
- .align = __alignof__(u64),
+ .len = sizeof(struct nft_timeout),
+ .align = __alignof__(struct nft_timeout),
},
[NFT_SET_EXT_USERDATA] = {
.len = sizeof(struct nft_userdata),
@@ -5812,25 +5814,32 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
- nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ u64 timeout = READ_ONCE(nft_set_ext_timeout(ext)->timeout);
+ u64 set_timeout = READ_ONCE(set->timeout);
+ __be64 msecs = 0;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- u64 expires, now = get_jiffies_64();
+ if (set_timeout != timeout) {
+ msecs = nf_jiffies64_to_msecs(timeout);
+ if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, msecs,
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
- expires = *nft_set_ext_expiration(ext);
- if (time_before64(now, expires))
- expires -= now;
- else
- expires = 0;
+ if (timeout > 0) {
+ u64 expires, now = get_jiffies_64();
- if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
- nf_jiffies64_to_msecs(expires),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ expires = READ_ONCE(nft_set_ext_timeout(ext)->expiration);
+ if (time_before64(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ nf_jiffies64_to_msecs(expires),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ }
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
@@ -6493,13 +6502,14 @@ struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set,
nft_set_ext_data(ext), data, set->dlen) < 0)
goto err_ext_check;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- *nft_set_ext_expiration(ext) = get_jiffies_64() + expiration;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ nft_set_ext_timeout(ext)->timeout = timeout;
+
if (expiration == 0)
- *nft_set_ext_expiration(ext) += timeout;
+ expiration = timeout;
+
+ nft_set_ext_timeout(ext)->expiration = get_jiffies_64() + expiration;
}
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
- *nft_set_ext_timeout(ext) = timeout;
return elem;
@@ -6842,6 +6852,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
+ u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@@ -6910,17 +6921,23 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = READ_ONCE(set->timeout);
+ timeout = set->timeout;
}
expiration = 0;
if (nla[NFTA_SET_ELEM_EXPIRATION] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (timeout == 0)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_ELEM_EXPIRATION],
&expiration);
if (err)
return err;
+
+ if (expiration > timeout)
+ return -ERANGE;
}
if (nla[NFTA_SET_ELEM_EXPR]) {
@@ -7006,16 +7023,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_parse_key_end;
}
- if (timeout > 0) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
-
- if (timeout != READ_ONCE(set->timeout)) {
- err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
- if (err < 0)
- goto err_parse_key_end;
- }
}
if (num_exprs) {
@@ -7153,8 +7164,30 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF) &&
*nft_set_ext_obj(ext) != *nft_set_ext_obj(ext2)))
goto err_element_clash;
- else if (!(nlmsg_flags & NLM_F_EXCL))
+ else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
+ if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
+ update_flags = 0;
+ if (timeout != nft_set_ext_timeout(ext2)->timeout) {
+ nft_trans_elem_timeout(trans) = timeout;
+ if (expiration == 0)
+ expiration = timeout;
+
+ update_flags |= NFT_TRANS_UPD_TIMEOUT;
+ }
+ if (expiration) {
+ nft_trans_elem_expiration(trans) = expiration;
+ update_flags |= NFT_TRANS_UPD_EXPIRATION;
+ }
+
+ if (update_flags) {
+ nft_trans_elem_priv(trans) = elem_priv;
+ nft_trans_elem_update_flags(trans) = update_flags;
+ nft_trans_commit_list_add_tail(ctx->net, trans);
+ goto err_elem_free;
+ }
+ }
+ }
} else if (err == -ENOTEMPTY) {
/* ENOTEMPTY reports overlapping between this element
* and an existing one.
@@ -10447,6 +10480,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
struct nft_set *set = nft_trans_set(trans);
@@ -10478,7 +10512,22 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- nft_setelem_activate(net, te->set, te->elem_priv);
+ if (te->update_flags) {
+ const struct nft_set_ext *ext =
+ nft_set_elem_ext(te->set, te->elem_priv);
+
+ if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
+ te->timeout);
+ }
+ if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
+ get_jiffies_64() + te->expiration);
+ }
+ } else {
+ nft_setelem_activate(net, te->set, te->elem_priv);
+ }
+
nf_tables_setelem_notify(&ctx, te->set,
te->elem_priv,
NFT_MSG_NEWSETELEM);
@@ -10755,6 +10804,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ list_del(&nft_trans_container_set(trans)->list_trans_newset);
if (nft_trans_set_update(trans)) {
nft_trans_destroy(trans);
break;
@@ -10777,12 +10827,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_set_bound(trans)) {
+ if (nft_trans_elem_update_flags(trans) ||
+ nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
- nft_setelem_remove(net, te->set, te->elem_priv);
+ if (!te->set->ops->abort ||
+ nft_setelem_is_catchall(te->set, te->elem_priv))
+ nft_setelem_remove(net, te->set, te->elem_priv);
+
if (!nft_setelem_is_catchall(te->set, te->elem_priv))
atomic_dec(&te->set->nelems);
@@ -10850,6 +10904,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
+
nft_set_abort_update(&set_update_list);
synchronize_rcu();
@@ -11026,10 +11082,11 @@ static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return 0;
}
-int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+int nft_parse_register_load(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *sreg, u32 len)
{
- u32 reg;
- int err;
+ int err, invalid_reg;
+ u32 reg, next_register;
err = nft_parse_register(attr, &reg);
if (err < 0)
@@ -11039,11 +11096,36 @@ int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
if (err < 0)
return err;
+ next_register = DIV_ROUND_UP(len, NFT_REG32_SIZE) + reg;
+
+ /* Can't happen: nft_validate_register_load() should have failed */
+ if (WARN_ON_ONCE(next_register > NFT_REG32_NUM))
+ return -EINVAL;
+
+ /* find first register that did not see an earlier store. */
+ invalid_reg = find_next_zero_bit(ctx->reg_inited, NFT_REG32_NUM, reg);
+
+ /* invalid register within the range that we're loading from? */
+ if (invalid_reg < next_register)
+ return -ENODATA;
+
*sreg = reg;
return 0;
}
EXPORT_SYMBOL_GPL(nft_parse_register_load);
+static void nft_saw_register_store(const struct nft_ctx *__ctx,
+ int reg, unsigned int len)
+{
+ unsigned int registers = DIV_ROUND_UP(len, NFT_REG32_SIZE);
+ struct nft_ctx *ctx = (struct nft_ctx *)__ctx;
+
+ if (WARN_ON_ONCE(len == 0 || reg < 0))
+ return;
+
+ bitmap_set(ctx->reg_inited, reg, registers);
+}
+
static int nft_validate_register_store(const struct nft_ctx *ctx,
enum nft_registers reg,
const struct nft_data *data,
@@ -11065,7 +11147,7 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
return err;
}
- return 0;
+ break;
default:
if (type != NFT_DATA_VALUE)
return -EINVAL;
@@ -11078,8 +11160,11 @@ static int nft_validate_register_store(const struct nft_ctx *ctx,
sizeof_field(struct nft_regs, data))
return -ERANGE;
- return 0;
+ break;
}
+
+ nft_saw_register_store(ctx, reg, len);
+ return 0;
}
int nft_parse_register_store(const struct nft_ctx *ctx,
@@ -11519,6 +11604,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->commit_set_list);
INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
@@ -11549,6 +11635,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_set_list));
if (!list_empty(&nft_net->module_list))
nf_tables_module_autoload_cleanup(net);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a48d5f0e2f3e..75598520b0fa 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -256,7 +256,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
const struct net *net = nft_net(pkt);
const struct nft_expr *expr, *last;
const struct nft_rule_dp *rule;
- struct nft_regs regs = {};
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 932b3ddb34f1..7784ec094097 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -402,27 +402,27 @@ replay_abort:
{
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
}
if (!ss->valid_genid || !ss->commit || !ss->abort) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!try_module_get(ss->owner)) {
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
if (!ss->valid_genid(net, genid)) {
module_put(ss->owner);
nfnl_unlock(subsys_id);
netlink_ack(oskb, nlh, -ERESTART, NULL);
- return kfree_skb(skb);
+ return consume_skb(skb);
}
nfnl_unlock(subsys_id);
@@ -567,7 +567,7 @@ done:
if (status & NFNL_BATCH_REPLAY) {
ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD);
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
@@ -593,7 +593,7 @@ done:
err = ss->abort(net, oskb, abort_action);
if (err == -EAGAIN) {
nfnl_err_reset(&err_list);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
status |= NFNL_BATCH_FAILURE;
goto replay_abort;
@@ -601,7 +601,7 @@ done:
}
nfnl_err_deliver(&err_list, oskb);
- kfree_skb(skb);
+ consume_skb(skb);
module_put(ss->owner);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e0716da256bf..d2773ce9b585 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -540,6 +540,14 @@ nla_put_failure:
return -1;
}
+static int nf_queue_checksum_help(struct sk_buff *entskb)
+{
+ if (skb_csum_is_sctp(entskb))
+ return skb_crc32c_csum_help(entskb);
+
+ return skb_checksum_help(entskb);
+}
+
static struct sk_buff *
nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nf_queue_entry *entry,
@@ -602,7 +610,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
case NFQNL_COPY_PACKET:
if (!(queue->flags & NFQA_CFG_F_GSO) &&
entskb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(entskb))
+ nf_queue_checksum_help(entskb))
return NULL;
data_len = READ_ONCE(queue->copy_range);
@@ -1014,7 +1022,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
break;
}
- if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
+ if (!skb_is_gso(skb) || ((queue->flags & NFQA_CFG_F_GSO) && !skb_is_gso_sctp(skb)))
return __nfqnl_enqueue_packet(net, queue, entry);
nf_bridge_adjust_skb_data(skb);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index ca857afbf061..7de95674fd8c 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -171,7 +171,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
@@ -365,7 +365,7 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG], &priv->sreg,
sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index f6e791a68101..2f82a444d21b 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -139,7 +139,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_BYTEORDER_SREG], &priv->sreg,
priv->len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index cd4652259095..2605f43737bc 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -83,7 +83,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -222,7 +222,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -323,7 +323,7 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index d3d11dede545..52cdfee17f73 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -350,8 +350,7 @@ nla_put_failure:
}
static int nft_target_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_target *target = expr->ops->data;
unsigned int hook_mask = 0;
@@ -611,8 +610,7 @@ static int nft_match_large_dump(struct sk_buff *skb,
}
static int nft_match_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct xt_match *match = expr->ops->data;
unsigned int hook_mask = 0;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index eab0dc66bee6..cc7325329496 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -8,7 +8,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -17,6 +17,11 @@
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
+ u64_stats_t bytes;
+ u64_stats_t packets;
+};
+
+struct nft_counter_tot {
s64 bytes;
s64 packets;
};
@@ -25,25 +30,24 @@ struct nft_counter_percpu_priv {
struct nft_counter __percpu *counter;
};
-static DEFINE_PER_CPU(seqcount_t, nft_counter_seq);
+static DEFINE_PER_CPU(struct u64_stats_sync, nft_counter_sync);
static inline void nft_counter_do_eval(struct nft_counter_percpu_priv *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
-
- write_seqcount_begin(myseq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- this_cpu->bytes += pkt->skb->len;
- this_cpu->packets++;
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->bytes, pkt->skb->len);
+ u64_stats_inc(&this_cpu->packets);
+ u64_stats_update_end(nft_sync);
- write_seqcount_end(myseq);
local_bh_enable();
}
@@ -66,17 +70,16 @@ static int nft_counter_do_init(const struct nlattr * const tb[],
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu = raw_cpu_ptr(cpu_stats);
if (tb[NFTA_COUNTER_PACKETS]) {
- this_cpu->packets =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ u64_stats_set(&this_cpu->packets,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])));
}
if (tb[NFTA_COUNTER_BYTES]) {
- this_cpu->bytes =
- be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ u64_stats_set(&this_cpu->bytes,
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])));
}
- preempt_enable();
+
priv->counter = cpu_stats;
return 0;
}
@@ -104,40 +107,41 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
}
static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
+
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, -total->packets);
+ u64_stats_add(&this_cpu->bytes, -total->bytes);
+ u64_stats_update_end(nft_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets -= total->packets;
- this_cpu->bytes -= total->bytes;
- write_seqcount_end(myseq);
local_bh_enable();
}
static void nft_counter_fetch(struct nft_counter_percpu_priv *priv,
- struct nft_counter *total)
+ struct nft_counter_tot *total)
{
struct nft_counter *this_cpu;
- const seqcount_t *myseq;
u64 bytes, packets;
unsigned int seq;
int cpu;
memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- myseq = per_cpu_ptr(&nft_counter_seq, cpu);
+ struct u64_stats_sync *nft_sync = per_cpu_ptr(&nft_counter_sync, cpu);
+
this_cpu = per_cpu_ptr(priv->counter, cpu);
do {
- seq = read_seqcount_begin(myseq);
- bytes = this_cpu->bytes;
- packets = this_cpu->packets;
- } while (read_seqcount_retry(myseq, seq));
+ seq = u64_stats_fetch_begin(nft_sync);
+ bytes = u64_stats_read(&this_cpu->bytes);
+ packets = u64_stats_read(&this_cpu->packets);
+ } while (u64_stats_fetch_retry(nft_sync, seq));
total->bytes += bytes;
total->packets += packets;
@@ -148,7 +152,7 @@ static int nft_counter_do_dump(struct sk_buff *skb,
struct nft_counter_percpu_priv *priv,
bool reset)
{
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -237,7 +241,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
struct nft_counter __percpu *cpu_stats;
struct nft_counter *this_cpu;
- struct nft_counter total;
+ struct nft_counter_tot total;
nft_counter_fetch(priv, &total);
@@ -245,11 +249,9 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src, g
if (cpu_stats == NULL)
return -ENOMEM;
- preempt_disable();
- this_cpu = this_cpu_ptr(cpu_stats);
- this_cpu->packets = total.packets;
- this_cpu->bytes = total.bytes;
- preempt_enable();
+ this_cpu = raw_cpu_ptr(cpu_stats);
+ u64_stats_set(&this_cpu->packets, total.packets);
+ u64_stats_set(&this_cpu->bytes, total.bytes);
priv_clone->counter = cpu_stats;
return 0;
@@ -267,17 +269,17 @@ static void nft_counter_offload_stats(struct nft_expr *expr,
const struct flow_stats *stats)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct u64_stats_sync *nft_sync;
struct nft_counter *this_cpu;
- seqcount_t *myseq;
local_bh_disable();
this_cpu = this_cpu_ptr(priv->counter);
- myseq = this_cpu_ptr(&nft_counter_seq);
+ nft_sync = this_cpu_ptr(&nft_counter_sync);
- write_seqcount_begin(myseq);
- this_cpu->packets += stats->pkts;
- this_cpu->bytes += stats->bytes;
- write_seqcount_end(myseq);
+ u64_stats_update_begin(nft_sync);
+ u64_stats_add(&this_cpu->packets, stats->pkts);
+ u64_stats_add(&this_cpu->bytes, stats->bytes);
+ u64_stats_update_end(nft_sync);
local_bh_enable();
}
@@ -286,7 +288,7 @@ void nft_counter_init_seqcount(void)
int cpu;
for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+ u64_stats_init(per_cpu_ptr(&nft_counter_sync, cpu));
}
struct nft_expr_type nft_counter_type;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 452ed94c3a4d..67a41cd2baaf 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -606,7 +606,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index e5739a59ebf1..0573f96ce079 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -40,7 +40,7 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b4ada3ab2167..88922e0e8e83 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -56,7 +56,7 @@ static struct nft_elem_priv *nft_dynset_new(struct nft_set *set,
if (!atomic_add_unless(&set->nelems, 1, set->size))
return NULL;
- timeout = priv->timeout ? : set->timeout;
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
elem_priv = nft_set_elem_init(set, &priv->tmpl,
&regs->data[priv->sreg_key], NULL,
&regs->data[priv->sreg_data],
@@ -94,9 +94,10 @@ void nft_dynset_eval(const struct nft_expr *expr,
if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
expr, regs, &ext)) {
if (priv->op == NFT_DYNSET_OP_UPDATE &&
- nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
- timeout = priv->timeout ? : set->timeout;
- *nft_set_ext_expiration(ext) = get_jiffies_64() + timeout;
+ nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ READ_ONCE(nft_set_ext_timeout(ext)->timeout) != 0) {
+ timeout = priv->timeout ? : READ_ONCE(set->timeout);
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + timeout);
}
nft_set_elem_update_expr(ext, regs, pkt);
@@ -215,7 +216,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return err;
}
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
set->klen);
if (err < 0)
return err;
@@ -226,7 +227,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ err = nft_parse_register_load(ctx, tb[NFTA_DYNSET_SREG_DATA],
&priv->sreg_data, set->dlen);
if (err < 0)
return err;
@@ -312,12 +313,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (priv->num_exprs)
nft_dynset_ext_add_expr(priv);
- if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout) {
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
- nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
- }
- }
+ if (set->flags & NFT_SET_TIMEOUT &&
+ (timeout || READ_ONCE(set->timeout)))
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
priv->timeout = timeout;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 6eb571d0c3fd..6bfd33516241 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -588,7 +588,7 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->flags = flags;
priv->op = op;
- return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_EXTHDR_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index b58f62195ff3..96e02a83c045 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -26,8 +26,7 @@ const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
};
EXPORT_SYMBOL(nft_fib_policy);
-int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_fib *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab9576098701..2f732fae5a83 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -9,6 +9,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
#include <net/ip.h> /* for ipv4 options. */
+#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -235,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = RT_TOS(ip_hdr(pkt->skb)->tos);
+ fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
@@ -380,8 +381,7 @@ out:
}
static int nft_flow_offload_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 358e742afad7..152a9fb4d23a 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -52,7 +52,7 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
}
@@ -178,12 +178,12 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ err = nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
sizeof(int));
if (err < 0)
return err;
- return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ return nft_parse_register_load(ctx, tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
addr_len);
}
@@ -204,8 +204,7 @@ nla_put_failure:
}
static int nft_fwd_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) |
(1 << NF_NETDEV_EGRESS));
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 868d68302d22..5d034bbb6913 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -92,7 +92,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_HASH_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ac2422c215e5..02ee5fb69871 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -244,8 +244,7 @@ nla_put_failure:
}
static int nft_immediate_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
struct nft_ctx *pctx = (struct nft_ctx *)ctx;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f3080fa1b226..63ef832b8aa7 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -113,7 +113,7 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_LOOKUP_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
@@ -206,8 +206,7 @@ nla_put_failure:
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **d)
+ const struct nft_expr *expr)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
struct nft_set_iter iter;
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 8a14aaca93bb..868bd4d73555 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
};
static int nft_masq_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -52,13 +51,13 @@ static int nft_masq_init(const struct nft_ctx *ctx,
priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_MASQ_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 9139ce38ea7b..8c8eb14d647b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -581,8 +581,7 @@ static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
}
static int nft_meta_get_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -600,8 +599,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
}
int nft_meta_set_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
@@ -657,7 +655,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
}
priv->len = len;
- err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
+ err = nft_parse_register_load(ctx, tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 808f5802c270..6e21f72c5b57 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -137,8 +137,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
};
static int nft_nat_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
struct nft_nat *priv = nft_expr_priv(expr);
int err;
@@ -214,13 +213,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MIN],
&priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_ADDR_MAX],
&priv->sreg_addr_max,
alen);
if (err < 0)
@@ -234,13 +233,13 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_NAT_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 509011b1ef59..09da7a3f9f96 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -143,7 +143,7 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
set->klen);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 7fec57ff736f..1c0b493ef0a9 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -108,8 +108,7 @@ nla_put_failure:
}
static int nft_osf_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
unsigned int hooks;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 50429cbd42da..330609a76fb2 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -981,7 +981,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
}
priv->csum_type = csum_type;
- return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ return nft_parse_register_load(ctx, tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index b2b8127c8d43..344fe311878f 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -69,8 +69,7 @@ static void nft_queue_sreg_eval(const struct nft_expr *expr,
}
static int nft_queue_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
@@ -136,7 +135,7 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx,
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ err = nft_parse_register_load(ctx, tb[NFTA_QUEUE_SREG_QNUM],
&priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 51ae64cd268f..ea382f7bbd78 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -83,7 +83,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
goto err2;
}
- err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ err = nft_parse_register_load(ctx, tb[NFTA_RANGE_SREG], &priv->sreg,
desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index a58bd8d291ff..95eedad85c83 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -27,8 +27,7 @@ static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
};
static int nft_redir_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
int err;
@@ -51,13 +50,13 @@ static int nft_redir_init(const struct nft_ctx *ctx,
plen = sizeof_field(struct nf_nat_range, min_proto.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MIN],
&priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ err = nft_parse_register_load(ctx, tb[NFTA_REDIR_REG_PROTO_MAX],
&priv->sreg_proto_max,
plen);
if (err < 0)
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index ed2e668474d6..196a92c7ea09 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -24,8 +24,7 @@ const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
EXPORT_SYMBOL_GPL(nft_reject_policy);
int nft_reject_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 973fa31a9dd6..49020e67304a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -61,8 +61,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
static int nft_reject_inet_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
index 7865cd8b11bb..2558ce1505d9 100644
--- a/net/netfilter/nft_reject_netdev.c
+++ b/net/netfilter/nft_reject_netdev.c
@@ -145,8 +145,7 @@ out:
}
static int nft_reject_netdev_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
}
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 14d88394bcb7..dc50b9a5bd68 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -160,8 +160,7 @@ nla_put_failure:
return -1;
}
-static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 12cdff640492..f5da0c1775f2 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -61,8 +61,8 @@ static noinline int nft_socket_cgroup_subtree_level(void)
struct cgroup *cgrp = cgroup_get_from_path("/");
int level;
- if (!cgrp)
- return -ENOENT;
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
level = cgrp->level;
@@ -275,8 +275,7 @@ static bool nft_socket_reduce(struct nft_regs_track *track,
}
static int nft_socket_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 1d737f89dfc1..5d3e51825985 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -248,8 +248,7 @@ static void nft_synproxy_eval(const struct nft_expr *expr,
}
static int nft_synproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 71412adb73d4..50481280abd2 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -254,14 +254,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_ADDR],
&priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ err = nft_parse_register_load(ctx, tb[NFTA_TPROXY_REG_PORT],
&priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
@@ -313,8 +313,7 @@ static int nft_tproxy_dump(struct sk_buff *skb,
}
static int nft_tproxy_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_expr *expr)
{
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 1c866757db55..8a07b46cc8fb 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -229,8 +229,7 @@ static int nft_xfrm_get_dump(struct sk_buff *skb,
return 0;
}
-static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 5d04ef80a61d..0e762277bcf8 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -86,6 +86,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
{
struct xt_connlimit_info *info = par->matchinfo;
unsigned int keylen;
+ int ret;
keylen = sizeof(u32);
if (par->family == NFPROTO_IPV6)
@@ -93,8 +94,17 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
else
keylen += sizeof(struct in_addr);
+ ret = nf_ct_netns_get(par->net, par->family);
+ if (ret < 0) {
+ pr_info_ratelimited("cannot load conntrack support for proto=%u\n",
+ par->family);
+ return ret;
+ }
+
/* init private data */
- info->data = nf_conncount_init(par->net, par->family, keylen);
+ info->data = nf_conncount_init(par->net, keylen);
+ if (IS_ERR(info->data))
+ nf_ct_netns_put(par->net, par->family);
return PTR_ERR_OR_ZERO(info->data);
}
@@ -103,7 +113,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
- nf_conncount_destroy(par->net, par->family, info->data);
+ nf_conncount_destroy(par->net, info->data);
+ nf_ct_netns_put(par->net, par->family);
}
static struct xt_match connlimit_mt_reg __read_mostly = {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 9751e29d4bbb..5b0e4e62ab8b 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -41,7 +41,6 @@ struct netlink_sock {
struct netlink_callback cb;
struct mutex nl_cb_mutex;
- struct mutex *dump_cb_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
int (*netlink_bind)(struct net *net, int group);
void (*netlink_unbind)(struct net *net, int group);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index bd2b17b219ae..2b5e246b8d9a 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -189,7 +189,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
}
nr_node->callsign = *nr;
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
nr_node->which = 0;
nr_node->count = 1;
@@ -214,7 +214,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_node_lock(nr_node);
if (quality != 0)
- strcpy(nr_node->mnemonic, mnemonic);
+ strscpy(nr_node->mnemonic, mnemonic);
for (found = 0, i = 0; i < nr_node->count; i++) {
if (nr_node->routes[i].neighbour == nr_neigh) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 101f9a23792c..16e260014684 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -237,14 +237,18 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
const struct ovs_action_push_vlan *vlan)
{
+ int err;
+
if (skb_vlan_tag_present(skb)) {
invalidate_flow_key(key);
} else {
key->eth.vlan.tci = vlan->vlan_tci;
key->eth.vlan.tpid = vlan->vlan_tpid;
}
- return skb_vlan_push(skb, vlan->vlan_tpid,
- ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ err = skb_vlan_push(skb, vlan->vlan_tpid,
+ ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
+ skb_reset_mac_len(skb);
+ return err;
}
/* 'src' is already properly masked. */
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8eb1d644b741..3bb4810234aa 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1368,11 +1368,8 @@ bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
attr == OVS_KEY_ATTR_CT_MARK)
return true;
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
- attr == OVS_KEY_ATTR_CT_LABELS) {
- struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-
- return ovs_net->xt_label;
- }
+ attr == OVS_KEY_ATTR_CT_LABELS)
+ return true;
return false;
}
@@ -1381,6 +1378,7 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
+ unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
struct ovs_conntrack_info ct_info;
const char *helper = NULL;
u16 family;
@@ -1409,6 +1407,12 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
return -ENOMEM;
}
+ if (nf_connlabels_get(net, n_bits - 1)) {
+ nf_ct_tmpl_free(ct_info.ct);
+ OVS_NLERR(log, "Failed to set connlabel length");
+ return -EOPNOTSUPP;
+ }
+
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
@@ -1577,6 +1581,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
if (ct_info->ct) {
if (ct_info->timeout[0])
nf_ct_destroy_timeout(ct_info->ct);
+ nf_connlabels_put(nf_ct_net(ct_info->ct));
nf_ct_tmpl_free(ct_info->ct);
}
}
@@ -1603,8 +1608,7 @@ static int ovs_ct_limit_init(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&ovs_net->ct_limit_info->limits[i]);
- ovs_net->ct_limit_info->data =
- nf_conncount_init(net, NFPROTO_INET, sizeof(u32));
+ ovs_net->ct_limit_info->data = nf_conncount_init(net, sizeof(u32));
if (IS_ERR(ovs_net->ct_limit_info->data)) {
err = PTR_ERR(ovs_net->ct_limit_info->data);
@@ -1621,7 +1625,7 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
const struct ovs_ct_limit_info *info = ovs_net->ct_limit_info;
int i;
- nf_conncount_destroy(net, NFPROTO_INET, info->data);
+ nf_conncount_destroy(net, info->data);
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
@@ -2002,17 +2006,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
int ovs_ct_init(struct net *net)
{
- unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
- if (nf_connlabels_get(net, n_bits - 1)) {
- ovs_net->xt_label = false;
- OVS_NLERR(true, "Failed to set connlabel length");
- } else {
- ovs_net->xt_label = true;
- }
-
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
return ovs_ct_limit_init(net, ovs_net);
#else
return 0;
@@ -2021,12 +2017,9 @@ int ovs_ct_init(struct net *net)
void ovs_ct_exit(struct net *net)
{
+#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
-#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
ovs_ct_limit_exit(net, ovs_net);
#endif
-
- if (ovs_net->xt_label)
- nf_connlabels_put(net);
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 9ca6231ea647..365b9bb7f546 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -160,9 +160,6 @@ struct ovs_net {
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
struct ovs_ct_limit_info *ct_limit_info;
#endif
-
- /* Module reference for configuring conntrack. */
- bool xt_label;
};
/**
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c92bdc4dfe19..729ef582a3a8 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2491,7 +2491,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
- return (void *)acts;
+ return ERR_CAST(acts);
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 4b33133cbdff..5858d65ea1a9 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -102,19 +102,20 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
+ netdev->lltx = true;
netdev->needs_free_netdev = true;
netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
- netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
- NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL;
+ netdev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+ NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL;
netdev->vlan_features = netdev->features;
netdev->hw_enc_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+ netdev->hw_features = netdev->features;
eth_hw_addr_random(netdev);
}
@@ -148,7 +149,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->netns_local = true;
rtnl_lock();
err = register_netdevice(vport->dev);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4a364cdd445e..a705ec214254 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2216,7 +2216,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
@@ -2336,7 +2336,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
}
- snaplen = skb->len;
+ snaplen = skb_frags_readable(skb) ? skb->len : skb_headlen(skb);
res = run_filter(skb, sk, snaplen);
if (!res)
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 75cd696963b2..f007730aa2bb 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -26,3 +26,12 @@ config RDS_DEBUG
bool "RDS debugging messages"
depends on RDS
default n
+
+config GCOV_PROFILE_RDS
+ bool "Enable GCOV profiling on RDS"
+ depends on GCOV_KERNEL
+ help
+ Enable GCOV profiling on RDS for checking which functions/lines
+ are executed.
+
+ If unsure, say N.
diff --git a/net/rds/Makefile b/net/rds/Makefile
index 8fdc118e2927..3af1ca1d965c 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -15,3 +15,8 @@ rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
tcp_send.o tcp_stats.o
ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG
+
+# for GCOV coverage profiling
+ifdef CONFIG_GCOV_PROFILE_RDS
+GCOV_PROFILE := y
+endif
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 2ba71102b1f1..8ef3178ed4d6 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -369,9 +369,6 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
void rds_ib_conn_free(void *arg);
int rds_ib_conn_path_connect(struct rds_conn_path *cp);
void rds_ib_conn_path_shutdown(struct rds_conn_path *cp);
-void rds_ib_state_change(struct sock *sk);
-int rds_ib_listen_init(void);
-void rds_ib_listen_stop(void);
__printf(2, 3)
void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -402,7 +399,6 @@ void rds_ib_inc_free(struct rds_incoming *inc);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
struct rds_ib_ack_state *state);
-void rds_ib_recv_tasklet_fn(unsigned long data);
void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 7a5367628c05..13a5126bc36e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -539,18 +539,14 @@ bool rfkill_get_global_sw_state(const enum rfkill_type type)
#endif
bool rfkill_set_hw_state_reason(struct rfkill *rfkill,
- bool blocked, unsigned long reason)
+ bool blocked,
+ enum rfkill_hard_block_reasons reason)
{
unsigned long flags;
bool ret, prev;
BUG_ON(!rfkill);
- if (WARN(reason & ~(RFKILL_HARD_BLOCK_SIGNAL |
- RFKILL_HARD_BLOCK_NOT_OWNER),
- "hw_state reason not supported: 0x%lx", reason))
- return rfkill_blocked(rfkill);
-
spin_lock_irqsave(&rfkill->lock, flags);
prev = !!(rfkill->hard_block_reasons & reason);
if (blocked) {
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 84529886c2e6..c268c2b011f4 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -3,6 +3,7 @@
* Copyright (c) 2011, NVIDIA Corporation.
*/
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -72,6 +73,20 @@ static int rfkill_gpio_acpi_probe(struct device *dev,
return devm_acpi_dev_add_driver_gpios(dev, acpi_rfkill_default_gpios);
}
+/* List of DMI matches for devices on which rfkill-gpio should not load,
+ * to avoid firmware bugs.
+ */
+static const struct dmi_system_id rfkill_gpio_deny_table[] = {
+ {
+ /* Lenovo Yoga Tab 3 Pro YT3-X90, bogus "BCM4752" device in DSDT */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"),
+ },
+ },
+ { }
+};
+
static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
@@ -81,6 +96,9 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
const char *type_name;
int ret;
+ if (dmi_check_system(rfkill_gpio_deny_table))
+ return -ENODEV;
+
rfkill = devm_kzalloc(&pdev->dev, sizeof(*rfkill), GFP_KERNEL);
if (!rfkill)
return -ENOMEM;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 08de24658f4f..80d682f89b23 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -856,7 +856,6 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_connection *conn,
struct sockaddr_rxrpc *peer_srx,
struct sk_buff *skb);
-void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
/*
@@ -969,7 +968,6 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 3ba8e7e739b5..2197eb625658 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -44,8 +44,6 @@ static DEFINE_MUTEX(zones_mutex);
struct zones_ht_key {
struct net *net;
u16 zone;
- /* Note : pad[] must be the last field. */
- u8 pad[];
};
struct tcf_ct_flow_table {
@@ -62,7 +60,7 @@ struct tcf_ct_flow_table {
static const struct rhashtable_params zones_params = {
.head_offset = offsetof(struct tcf_ct_flow_table, node),
.key_offset = offsetof(struct tcf_ct_flow_table, key),
- .key_len = offsetof(struct zones_ht_key, pad),
+ .key_len = offsetofend(struct zones_ht_key, zone),
.automatic_shrinking = true,
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 22f4b1e8ade9..383bf18b6862 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -96,6 +96,7 @@ out:
if (skb_at_tc_ingress(skb))
skb_pull_rcsum(skb, skb->mac_len);
+ skb_reset_mac_len(skb);
return action;
drop:
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index d2f49db70523..f2f9b75008bb 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -361,8 +361,24 @@ static const u8 besteffort[] = {
static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
static const u8 bulk_order[] = {1, 0, 2, 3};
+/* There is a big difference in timing between the accurate values placed in the
+ * cache and the approximations given by a single Newton step for small count
+ * values, particularly when stepping from count 1 to 2 or vice versa. Hence,
+ * these values are calculated using eight Newton steps, using the
+ * implementation below. Above 16, a single Newton step gives sufficient
+ * accuracy in either direction, given the precision stored.
+ *
+ * The magnitude of the error when stepping up to count 2 is such as to give the
+ * value that *should* have been produced at count 4.
+ */
+
#define REC_INV_SQRT_CACHE (16)
-static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
+static const u32 inv_sqrt_cache[REC_INV_SQRT_CACHE] = {
+ ~0, ~0, 3037000500, 2479700525,
+ 2147483647, 1920767767, 1753413056, 1623345051,
+ 1518500250, 1431655765, 1358187914, 1294981364,
+ 1239850263, 1191209601, 1147878294, 1108955788
+};
/* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
* new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
@@ -388,47 +404,14 @@ static void cobalt_newton_step(struct cobalt_vars *vars)
static void cobalt_invsqrt(struct cobalt_vars *vars)
{
if (vars->count < REC_INV_SQRT_CACHE)
- vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
+ vars->rec_inv_sqrt = inv_sqrt_cache[vars->count];
else
cobalt_newton_step(vars);
}
-/* There is a big difference in timing between the accurate values placed in
- * the cache and the approximations given by a single Newton step for small
- * count values, particularly when stepping from count 1 to 2 or vice versa.
- * Above 16, a single Newton step gives sufficient accuracy in either
- * direction, given the precision stored.
- *
- * The magnitude of the error when stepping up to count 2 is such as to give
- * the value that *should* have been produced at count 4.
- */
-
-static void cobalt_cache_init(void)
-{
- struct cobalt_vars v;
-
- memset(&v, 0, sizeof(v));
- v.rec_inv_sqrt = ~0U;
- cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
-
- for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
- cobalt_newton_step(&v);
-
- cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
- }
-}
-
static void cobalt_vars_init(struct cobalt_vars *vars)
{
memset(vars, 0, sizeof(*vars));
-
- if (!cobalt_rec_inv_sqrt_cache[0]) {
- cobalt_cache_init();
- cobalt_rec_inv_sqrt_cache[0] = ~0;
- }
}
/* CoDel control_law is t + interval/sqrt(count)
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index cc2df9f8c14a..8498d0606b24 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1952,7 +1952,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- rcu_assign_pointer(q->admin_sched, new_admin);
+ /* Not going to race against advance_sched(), but still */
+ admin = rcu_replace_pointer(q->admin_sched, new_admin,
+ lockdep_rtnl_is_held());
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
} else {
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5a7436a13b74..39ca5403d4d7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -44,6 +44,7 @@
#include <net/inet_common.h>
#include <net/inet_ecn.h>
#include <net/udp_tunnel.h>
+#include <net/inet_dscp.h>
#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024)
@@ -435,7 +436,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = RT_TOS(tos);
+ fl4->flowi4_tos = tos & INET_DSCP_MASK;
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8e3093938cd2..0316217b7687 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1466,10 +1466,6 @@ connect_abort:
static int smc_connect_check_aclc(struct smc_init_info *ini,
struct smc_clc_msg_accept_confirm *aclc)
{
- if (aclc->hdr.typev1 != SMC_TYPE_R &&
- aclc->hdr.typev1 != SMC_TYPE_D)
- return SMC_CLC_DECL_MODEUNSUPP;
-
if (aclc->hdr.version >= SMC_V2) {
if ((aclc->hdr.typev1 == SMC_TYPE_R &&
!smcr_indicated(ini->smc_type_v2)) ||
@@ -1523,10 +1519,6 @@ static int __smc_connect(struct smc_sock *smc)
ini->smcd_version &= ~SMC_V1;
ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
- if (!ini->smcd_version) {
- rc = SMC_CLC_DECL_GETVLANERR;
- goto fallback;
- }
}
rc = smc_find_proposal_devices(smc, ini);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 467effb50cd6..5625fda2960b 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -145,6 +145,8 @@ struct smc_clc_v2_extension {
);
u8 user_eids[][SMC_MAX_EID_LEN];
};
+static_assert(offsetof(struct smc_clc_v2_extension, user_eids) == sizeof(struct smc_clc_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
__be32 outgoing_subnet; /* subnet mask */
@@ -169,6 +171,8 @@ struct smc_clc_smcd_v2_extension {
);
struct smc_clc_smcd_gid_chid gidchid[];
};
+static_assert(offsetof(struct smc_clc_smcd_v2_extension, gidchid) == sizeof(struct smc_clc_smcd_v2_extension_fixed),
+ "struct member likely outside of struct_group_tagged()");
struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
struct smc_clc_msg_hdr hdr;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3b95828d9976..4e694860ece4 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -221,6 +221,35 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
write_unlock_bh(&lgr->conns_lock);
}
+static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct list_head *buf_list,
+ struct smc_buf_desc *buf_desc)
+{
+ list_add(&buf_desc->list, buf_list);
+ if (is_rmb) {
+ lgr->alloc_rmbs += buf_desc->len;
+ lgr->alloc_rmbs +=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs += buf_desc->len;
+ }
+}
+
+static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
+ bool is_rmb,
+ struct smc_buf_desc *buf_desc)
+{
+ list_del(&buf_desc->list);
+ if (is_rmb) {
+ lgr->alloc_rmbs -= buf_desc->len;
+ lgr->alloc_rmbs -=
+ lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
+ } else {
+ lgr->alloc_sndbufs -= buf_desc->len;
+ }
+}
+
int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
@@ -363,6 +392,10 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr,
smc_target[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
if (lgr->smc_version > SMC_V1) {
v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
if (!v2_attrs)
@@ -541,6 +574,10 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
+ goto errattr;
memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
smc_pnet[SMC_MAX_PNETID_LEN] = 0;
if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
@@ -1138,7 +1175,7 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
down_write(lock);
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
@@ -1166,22 +1203,30 @@ static void smcd_buf_detach(struct smc_connection *conn)
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
+ struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ bool is_smcd = lgr->is_smcd;
+ int bufsize;
+
if (conn->sndbuf_desc) {
- if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
+ bufsize = conn->sndbuf_desc->len;
+ if (!is_smcd && conn->sndbuf_desc->is_vm) {
smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
} else {
- memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
+ memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->sndbuf_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
}
if (conn->rmb_desc) {
- if (!lgr->is_smcd) {
+ bufsize = conn->rmb_desc->len;
+ if (!is_smcd) {
smcr_buf_unuse(conn->rmb_desc, true, lgr);
} else {
- memzero_explicit(conn->rmb_desc->cpu_addr,
- conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
+ bufsize += sizeof(struct smcd_cdc_msg);
+ memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
WRITE_ONCE(conn->rmb_desc->used, 0);
}
+ SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
}
}
@@ -1377,7 +1422,7 @@ static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
buf_list = &lgr->sndbufs[i];
list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
list) {
- list_del(&buf_desc->list);
+ smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
smc_buf_free(lgr, is_rmb, buf_desc);
}
}
@@ -2250,7 +2295,7 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
- bool is_rmb, int bufsize)
+ int bufsize)
{
struct smc_buf_desc *buf_desc;
@@ -2390,7 +2435,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
break; /* found reusable slot */
}
@@ -2398,7 +2443,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_smcd)
buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
else
- buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
+ buf_desc = smcr_new_buf_create(lgr, bufsize);
if (PTR_ERR(buf_desc) == -ENOMEM)
break;
@@ -2411,10 +2456,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
}
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
- SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
buf_desc->used = 1;
down_write(lock);
- list_add(&buf_desc->list, buf_list);
+ smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
up_write(lock);
break; /* found */
}
@@ -2496,7 +2541,8 @@ create_rmb:
rc = __smc_buf_create(smc, is_smcd, true);
if (rc && smc->conn.sndbuf_desc) {
down_write(&smc->conn.lgr->sndbufs_lock);
- list_del(&smc->conn.sndbuf_desc->list);
+ smc_lgr_buf_list_del(smc->conn.lgr, false,
+ smc->conn.sndbuf_desc);
up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d93cf51dbd7c..0db4e5f79ac4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -281,6 +281,8 @@ struct smc_link_group {
struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
struct rw_semaphore rmbs_lock; /* protects rx buffers */
+ u64 alloc_sndbufs; /* stats of tx buffers */
+ u64 alloc_rmbs; /* stats of rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 6dd4292dae56..04dc6808d2e1 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -15,7 +15,6 @@
#define _SMC_LOOPBACK_H
#include <linux/device.h>
-#include <linux/err.h>
#include <net/smc.h>
#if IS_ENABLED(CONFIG_SMC_LO)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2adb92b8c469..1dd362326c0a 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -887,9 +887,6 @@ int smc_pnet_net_init(struct net *net)
smc_pnet_create_pnetids_list(net);
- /* disable handshake limitation by default */
- net->smc.limit_smc_hs = 0;
-
return 0;
}
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
index ca14c0f3a07d..e71b17d1e21c 100644
--- a/net/smc/smc_stats.c
+++ b/net/smc/smc_stats.c
@@ -218,6 +218,12 @@ static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
smc_tech->tx_bytes,
SMC_NLA_STATS_PAD))
goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_RX_RMB_USAGE,
+ smc_tech->rx_rmbuse))
+ goto errattr;
+ if (nla_put_uint(skb, SMC_NLA_STATS_T_TX_RMB_USAGE,
+ smc_tech->tx_rmbuse))
+ goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
smc_tech->rx_cnt,
SMC_NLA_STATS_PAD))
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
index e19177ce4092..571f9d9e7814 100644
--- a/net/smc/smc_stats.h
+++ b/net/smc/smc_stats.h
@@ -79,6 +79,8 @@ struct smc_stats_tech {
u64 tx_bytes;
u64 rx_cnt;
u64 tx_cnt;
+ u64 rx_rmbuse;
+ u64 tx_rmbuse;
};
struct smc_stats {
@@ -135,38 +137,46 @@ do { \
} \
while (0)
-#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \
+#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _is_add, _len) \
do { \
+ typeof(_smc_stats) stats = (_smc_stats); \
+ typeof(_is_add) is_a = (_is_add); \
typeof(_len) _l = (_len); \
typeof(_tech) t = (_tech); \
int _pos; \
int m = SMC_BUF_MAX - 1; \
if (_l <= 0) \
break; \
- _pos = fls((_l - 1) >> 13); \
- _pos = (_pos <= m) ? _pos : m; \
- this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
+ if (is_a) { \
+ _pos = fls((_l - 1) >> 13); \
+ _pos = (_pos <= m) ? _pos : m; \
+ this_cpu_inc((*stats).smc[t].k ## _rmbsize.buf[_pos]); \
+ this_cpu_add((*stats).smc[t].k ## _rmbuse, _l); \
+ } else { \
+ this_cpu_sub((*stats).smc[t].k ## _rmbuse, _l); \
+ } \
} \
while (0)
#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
-#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \
+#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _is_add, _len) \
do { \
struct net *_net = sock_net(&(_smc)->sk); \
struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(_is_add) is_add = (_is_add); \
typeof(_is_smcd) is_d = (_is_smcd); \
typeof(_is_rx) is_r = (_is_rx); \
typeof(_len) l = (_len); \
if ((is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, is_add, l); \
if ((is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, is_add, l); \
if (!(is_d) && (is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, is_add, l); \
if (!(is_d) && !(is_r)) \
- SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, is_add, l); \
} \
while (0)
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 13f2bc092db1..2fab6456f765 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -90,6 +90,15 @@ static struct ctl_table smc_table[] = {
.extra1 = &conns_per_lgr_min,
.extra2 = &conns_per_lgr_max,
},
+ {
+ .procname = "limit_smc_hs",
+ .data = &init_net.smc.limit_smc_hs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
int __net_init smc_sysctl_net_init(struct net *net)
@@ -121,6 +130,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
+ /* disable handshake limitation by default */
+ net->smc.limit_smc_hs = 0;
return 0;
diff --git a/net/socket.c b/net/socket.c
index 0a2bd22ec105..8d8b84fa404a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -946,11 +946,17 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
memset(&tss, 0, sizeof(tss));
tsflags = READ_ONCE(sk->sk_tsflags);
- if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE &&
+ (tsflags & SOF_TIMESTAMPING_RX_HARDWARE ||
+ skb_is_err_queue(skb) ||
+ !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 593846d25214..114fef65f92e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -320,8 +320,8 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
{
struct tipc_msg *hdr, *_hdr;
struct sk_buff_head tmpq;
+ u16 cong_link_cnt = 0;
struct sk_buff *_skb;
- u16 cong_link_cnt;
int rc = 0;
/* Is a cluster supporting with new capabilities ? */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 5a526ebafeb4..ae1ddbf71853 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -163,8 +163,12 @@ static int bearer_name_validate(const char *name,
/* return bearer name components, if necessary */
if (name_parts) {
- strcpy(name_parts->media_name, media_name);
- strcpy(name_parts->if_name, if_name);
+ if (strscpy(name_parts->media_name, media_name,
+ TIPC_MAX_MEDIA_NAME) < 0)
+ return 0;
+ if (strscpy(name_parts->if_name, if_name,
+ TIPC_MAX_IF_NAME) < 0)
+ return 0;
}
return 1;
}
@@ -322,7 +326,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (!b)
return -ENOMEM;
- strcpy(b->name, name);
+ strscpy(b->name, name);
b->media = m;
res = m->enable_media(net, b, attr);
if (res) {
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 77a3d016cade..e2f19627e43d 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -149,7 +149,7 @@ static int dom_size(int peers)
while ((i * i) < peers)
i++;
- return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
+ return min(i, MAX_MON_DOMAIN);
}
static void map_set(u64 *up_map, int i, unsigned int v)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1a0cd06f0eae..65dcbb54f55d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1009,12 +1009,11 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
struct tipc_member *mbr = NULL;
struct net *net = sock_net(sk);
u32 node, port, exclude;
- struct list_head dsts;
+ LIST_HEAD(dsts);
int lookups = 0;
int dstcnt, rc;
bool cong;
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
@@ -1161,10 +1160,9 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct net *net = sock_net(sk);
- struct list_head dsts;
u32 dstcnt, exclude;
+ LIST_HEAD(dsts);
- INIT_LIST_HEAD(&dsts);
ua->sa.type = msg_nametype(hdr);
ua->scope = msg_lookup_scope(hdr);
exclude = tipc_group_exclude(grp);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 305a412785f5..bbf26cc4f6ee 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1201,7 +1201,7 @@ trim_sgl:
if (!num_async) {
goto send_end;
- } else if (num_zc) {
+ } else if (num_zc || eor) {
int err;
/* Wait for pending encryptions to get completed */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0be0dcb07f7b..001ccc55ef0f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -693,10 +693,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_state_unlock(sk);
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
- }
+ u->oob_skb = NULL;
#endif
wake_up_interruptible_all(&u->peer_wait);
@@ -2226,13 +2223,9 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
}
maybe_add_creds(skb, sock, other);
- skb_get(skb);
-
scm_stat_add(other, skb);
spin_lock(&other->sk_receive_queue.lock);
- if (ousk->oob_skb)
- consume_skb(ousk->oob_skb);
WRITE_ONCE(ousk->oob_skb, skb);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -2640,8 +2633,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
WRITE_ONCE(u->oob_skb, NULL);
- else
- skb_get(oob_skb);
spin_unlock(&sk->sk_receive_queue.lock);
unix_state_unlock(sk);
@@ -2651,8 +2642,6 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
if (!(state->flags & MSG_PEEK))
UNIXCB(oob_skb).consumed += 1;
- consume_skb(oob_skb);
-
mutex_unlock(&u->iolock);
if (chunk < 0)
@@ -2665,56 +2654,52 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
int flags, int copied)
{
+ struct sk_buff *read_skb = NULL, *unread_skb = NULL;
struct unix_sock *u = unix_sk(sk);
- if (!unix_skb_len(skb)) {
- struct sk_buff *unlinked_skb = NULL;
+ if (likely(unix_skb_len(skb) && skb != READ_ONCE(u->oob_skb)))
+ return skb;
- spin_lock(&sk->sk_receive_queue.lock);
+ spin_lock(&sk->sk_receive_queue.lock);
+ if (!unix_skb_len(skb)) {
if (copied && (!u->oob_skb || skb == u->oob_skb)) {
skb = NULL;
} else if (flags & MSG_PEEK) {
skb = skb_peek_next(skb, &sk->sk_receive_queue);
} else {
- unlinked_skb = skb;
+ read_skb = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- __skb_unlink(unlinked_skb, &sk->sk_receive_queue);
+ __skb_unlink(read_skb, &sk->sk_receive_queue);
}
- spin_unlock(&sk->sk_receive_queue.lock);
+ if (!skb)
+ goto unlock;
+ }
- consume_skb(unlinked_skb);
- } else {
- struct sk_buff *unlinked_skb = NULL;
+ if (skb != u->oob_skb)
+ goto unlock;
- spin_lock(&sk->sk_receive_queue.lock);
+ if (copied) {
+ skb = NULL;
+ } else if (!(flags & MSG_PEEK)) {
+ WRITE_ONCE(u->oob_skb, NULL);
- if (skb == u->oob_skb) {
- if (copied) {
- skb = NULL;
- } else if (!(flags & MSG_PEEK)) {
- if (sock_flag(sk, SOCK_URGINLINE)) {
- WRITE_ONCE(u->oob_skb, NULL);
- consume_skb(skb);
- } else {
- __skb_unlink(skb, &sk->sk_receive_queue);
- WRITE_ONCE(u->oob_skb, NULL);
- unlinked_skb = skb;
- skb = skb_peek(&sk->sk_receive_queue);
- }
- } else if (!sock_flag(sk, SOCK_URGINLINE)) {
- skb = skb_peek_next(skb, &sk->sk_receive_queue);
- }
+ if (!sock_flag(sk, SOCK_URGINLINE)) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ unread_skb = skb;
+ skb = skb_peek(&sk->sk_receive_queue);
}
+ } else if (!sock_flag(sk, SOCK_URGINLINE)) {
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ }
- spin_unlock(&sk->sk_receive_queue.lock);
+unlock:
+ spin_unlock(&sk->sk_receive_queue.lock);
+
+ consume_skb(read_skb);
+ kfree_skb(unread_skb);
- if (unlinked_skb) {
- WARN_ON_ONCE(skb_unref(unlinked_skb));
- kfree_skb(unlinked_skb);
- }
- }
return skb;
}
#endif
@@ -2756,7 +2741,6 @@ static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
unix_state_unlock(sk);
if (drop) {
- WARN_ON_ONCE(skb_unref(skb));
kfree_skb(skb);
return -EAGAIN;
}
@@ -3192,9 +3176,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
struct sk_buff *oob_skb = READ_ONCE(u->oob_skb);
+ struct sk_buff *next_skb;
+
+ next_skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb == oob_skb ||
- (!oob_skb && !unix_skb_len(skb)))
+ (!unix_skb_len(skb) &&
+ (!oob_skb || next_skb == oob_skb)))
answ = 1;
}
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 06d94ad999e9..0068e758be4d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -337,18 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex)
return true;
}
-static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
-{
- skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
-
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- WARN_ON_ONCE(skb_unref(u->oob_skb));
- u->oob_skb = NULL;
- }
-#endif
-}
-
static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
{
struct unix_vertex *vertex;
@@ -371,11 +359,11 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist
struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
spin_lock(&embryo_queue->lock);
- unix_collect_queue(unix_sk(skb->sk), hitlist);
+ skb_queue_splice_init(embryo_queue, hitlist);
spin_unlock(&embryo_queue->lock);
}
} else {
- unix_collect_queue(u, hitlist);
+ skb_queue_splice_init(queue, hitlist);
}
spin_unlock(&queue->lock);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 0ff9b2dd86ba..35681adedd9a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -112,6 +112,7 @@
#include <net/sock.h>
#include <net/af_vsock.h>
#include <uapi/linux/vm_sockets.h>
+#include <uapi/asm-generic/ioctls.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
@@ -1295,6 +1296,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
+static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
+ int __user *arg)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk;
+ int ret;
+
+ vsk = vsock_sk(sk);
+
+ switch (cmd) {
+ case SIOCOUTQ: {
+ ssize_t n_bytes;
+
+ if (!vsk->transport || !vsk->transport->unsent_bytes) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
+ ret = -EINVAL;
+ break;
+ }
+
+ n_bytes = vsk->transport->unsent_bytes(vsk);
+ if (n_bytes < 0) {
+ ret = n_bytes;
+ break;
+ }
+
+ ret = put_user(n_bytes, arg);
+ break;
+ }
+ default:
+ ret = -ENOIOCTLCMD;
+ }
+
+ return ret;
+}
+
+static int vsock_ioctl(struct socket *sock, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ lock_sock(sock->sk);
+ ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
+ release_sock(sock->sk);
+
+ return ret;
+}
+
static const struct proto_ops vsock_dgram_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -1305,7 +1357,7 @@ static const struct proto_ops vsock_dgram_ops = {
.accept = sock_no_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
.sendmsg = vsock_dgram_sendmsg,
@@ -2294,7 +2346,7 @@ static const struct proto_ops vsock_stream_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
@@ -2316,7 +2368,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
.accept = vsock_accept,
.getname = vsock_getname,
.poll = vsock_poll,
- .ioctl = sock_no_ioctl,
+ .ioctl = vsock_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
.setsockopt = vsock_connectible_setsockopt,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 64a07acfef12..e0160da4ef43 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -311,7 +311,7 @@ static void virtio_transport_tx_work(struct work_struct *work)
virtqueue_disable_cb(vq);
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
- consume_skb(skb);
+ virtio_transport_consume_skb_sent(skb, true);
added = true;
}
} while (!virtqueue_enable_cb(vq));
@@ -540,6 +540,8 @@ static struct virtio_transport virtio_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 16ff976a86e3..884ee128851e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -463,6 +463,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
+void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
+{
+ struct sock *s = skb->sk;
+
+ if (s && skb->len) {
+ struct vsock_sock *vs = vsock_sk(s);
+ struct virtio_vsock_sock *vvs;
+
+ vvs = vs->trans;
+
+ spin_lock_bh(&vvs->tx_lock);
+ vvs->bytes_unsent -= skb->len;
+ spin_unlock_bh(&vvs->tx_lock);
+ }
+
+ if (consume)
+ consume_skb(skb);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
+
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
u32 ret;
@@ -475,6 +495,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
if (ret > credit)
ret = credit;
vvs->tx_cnt += ret;
+ vvs->bytes_unsent += ret;
spin_unlock_bh(&vvs->tx_lock);
return ret;
@@ -488,6 +509,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
spin_lock_bh(&vvs->tx_lock);
vvs->tx_cnt -= credit;
+ vvs->bytes_unsent -= credit;
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
@@ -1090,6 +1112,19 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
+ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ size_t ret;
+
+ spin_lock_bh(&vvs->tx_lock);
+ ret = vvs->bytes_unsent;
+ spin_unlock_bh(&vvs->tx_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
+
static int virtio_transport_reset(struct vsock_sock *vsk,
struct sk_buff *skb)
{
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 6dea6119f5b2..6e78927a598e 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
.notify_buffer_size = virtio_transport_notify_buffer_size,
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
+ .unsent_bytes = virtio_transport_unsent_bytes,
+
.read_skb = virtio_transport_read_skb,
},
@@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
spin_unlock_bh(&vsock->pkt_queue.lock);
while ((skb = __skb_dequeue(&pkts))) {
+ /* Decrement the bytes_unsent counter without deallocating skb
+ * It is freed by the receiver.
+ */
+ virtio_transport_consume_skb_sent(skb, false);
virtio_transport_deliver_tap_pkt(skb);
virtio_transport_recv_pkt(&loopback_transport, skb);
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4d5d351bd0b5..661adfc77644 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -165,11 +165,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
if (err)
break;
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
if (err) {
@@ -181,11 +181,11 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
list) {
if (!wdev->netdev)
continue;
- wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = false;
err = dev_change_net_namespace(wdev->netdev, net,
"wlan%d");
WARN_ON(err);
- wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+ wdev->netdev->netns_local = true;
}
return err;
@@ -1473,7 +1473,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
SET_NETDEV_DEVTYPE(dev, &wiphy_type);
wdev->netdev = dev;
/* can only change netns with wiphy */
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
cfg80211_init_wdev(wdev);
break;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 41c8c0e3ba2e..3b3e3cd7027a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -170,6 +170,12 @@ static inline int for_each_rdev_check_rtnl(void)
if (for_each_rdev_check_rtnl()) {} else \
list_for_each_entry(rdev, &cfg80211_rdev_list, list)
+enum bss_source_type {
+ BSS_SOURCE_DIRECT = 0,
+ BSS_SOURCE_MBSSID,
+ BSS_SOURCE_STA_PROFILE,
+};
+
struct cfg80211_internal_bss {
struct list_head list;
struct list_head hidden_list;
@@ -191,6 +197,8 @@ struct cfg80211_internal_bss {
*/
u8 parent_bssid[ETH_ALEN] __aligned(2);
+ enum bss_source_type bss_source;
+
/* must be last because of priv member */
struct cfg80211_bss pub;
};
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 34e5acff3935..1e3ed29f7cfc 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -94,7 +94,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
lockdep_assert_held(&rdev->wiphy.mtx);
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (wdev->u.ibss.ssid_len)
diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c
index d66a913027e0..64c447040786 100644
--- a/net/wireless/lib80211.c
+++ b/net/wireless/lib80211.c
@@ -34,7 +34,7 @@ MODULE_LICENSE("GPL");
struct lib80211_crypto_alg {
struct list_head list;
- struct lib80211_crypto_ops *ops;
+ const struct lib80211_crypto_ops *ops;
};
static LIST_HEAD(lib80211_crypto_algs);
@@ -161,7 +161,7 @@ void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info,
}
EXPORT_SYMBOL(lib80211_crypt_delayed_deinit);
-int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
+int lib80211_register_crypto_ops(const struct lib80211_crypto_ops *ops)
{
unsigned long flags;
struct lib80211_crypto_alg *alg;
@@ -183,7 +183,7 @@ int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops)
}
EXPORT_SYMBOL(lib80211_register_crypto_ops);
-int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
+int lib80211_unregister_crypto_ops(const struct lib80211_crypto_ops *ops)
{
struct lib80211_crypto_alg *alg;
unsigned long flags;
@@ -206,7 +206,7 @@ int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops)
}
EXPORT_SYMBOL(lib80211_unregister_crypto_ops);
-struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
+const struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name)
{
struct lib80211_crypto_alg *alg;
unsigned long flags;
@@ -234,7 +234,7 @@ static void lib80211_crypt_null_deinit(void *priv)
{
}
-static struct lib80211_crypto_ops lib80211_crypt_null = {
+static const struct lib80211_crypto_ops lib80211_crypt_null = {
.name = "NULL",
.init = lib80211_crypt_null_init,
.deinit = lib80211_crypt_null_deinit,
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index cca5e1cf089e..5aad139130e1 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -418,7 +418,7 @@ static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
ccmp->dot11RSNAStatsCCMPDecryptErrors);
}
-static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
+static const struct lib80211_crypto_ops lib80211_crypt_ccmp = {
.name = "CCMP",
.init = lib80211_ccmp_init,
.deinit = lib80211_ccmp_deinit,
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 5c8cdf7681e3..63e68e5e121e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -705,7 +705,7 @@ static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
tkip->dot11RSNAStatsTKIPLocalMICFailures);
}
-static struct lib80211_crypto_ops lib80211_crypt_tkip = {
+static const struct lib80211_crypto_ops lib80211_crypt_tkip = {
.name = "TKIP",
.init = lib80211_tkip_init,
.deinit = lib80211_tkip_deinit,
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 6ab9957b8f96..3b148c7bef85 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -226,7 +226,7 @@ static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
}
-static struct lib80211_crypto_ops lib80211_crypt_wep = {
+static const struct lib80211_crypto_ops lib80211_crypt_wep = {
.name = "WEP",
.init = lib80211_wep_init,
.deinit = lib80211_wep_deinit,
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index aaca65b66af4..2c6654075ca9 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -127,7 +127,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
if (!rdev->ops->join_mesh)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[0].cac_started)
return -EBUSY;
if (!setup->chandef.chan) {
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 4052041a19ea..4dac81854721 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1110,26 +1110,28 @@ EXPORT_SYMBOL(__cfg80211_radar_event);
void cfg80211_cac_event(struct net_device *netdev,
const struct cfg80211_chan_def *chandef,
- enum nl80211_radar_event event, gfp_t gfp)
+ enum nl80211_radar_event event, gfp_t gfp,
+ unsigned int link_id)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
unsigned long timeout;
- /* not yet supported */
- if (wdev->valid_links)
+ if (WARN_ON(wdev->valid_links &&
+ !(wdev->valid_links & BIT(link_id))))
return;
- trace_cfg80211_cac_event(netdev, event);
+ trace_cfg80211_cac_event(netdev, event, link_id);
- if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED))
+ if (WARN_ON(!wdev->links[link_id].cac_started &&
+ event != NL80211_RADAR_CAC_STARTED))
return;
switch (event) {
case NL80211_RADAR_CAC_FINISHED:
- timeout = wdev->cac_start_time +
- msecs_to_jiffies(wdev->cac_time_ms);
+ timeout = wdev->links[link_id].cac_start_time +
+ msecs_to_jiffies(wdev->links[link_id].cac_time_ms);
WARN_ON(!time_after_eq(jiffies, timeout));
cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE);
memcpy(&rdev->cac_done_chandef, chandef,
@@ -1138,10 +1140,10 @@ void cfg80211_cac_event(struct net_device *netdev,
cfg80211_sched_dfs_chan_update(rdev);
fallthrough;
case NL80211_RADAR_CAC_ABORTED:
- wdev->cac_started = false;
+ wdev->links[link_id].cac_started = false;
break;
case NL80211_RADAR_CAC_STARTED:
- wdev->cac_started = true;
+ wdev->links[link_id].cac_started = true;
break;
default:
WARN_ON(1);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7397a372c78e..9ab777e0bd4d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6066,7 +6066,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->start_ap)
return -EOPNOTSUPP;
- if (wdev->cac_started)
+ if (wdev->links[link_id].cac_started)
return -EBUSY;
if (wdev->links[link_id].ap.beacon_interval)
@@ -9778,7 +9778,8 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
return ERR_PTR(-ENOMEM);
if (n_ssids)
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = n_ssids;
if (ie_len) {
if (n_ssids)
@@ -10072,6 +10073,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int link_id = nl80211_link_id(info->attrs);
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_chan_def chandef;
enum nl80211_dfs_regions dfs_region;
@@ -10121,7 +10123,20 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
goto unlock;
}
- if (cfg80211_beaconing_iface_active(wdev) || wdev->cac_started) {
+ if (cfg80211_beaconing_iface_active(wdev)) {
+ /* During MLO other link(s) can beacon, only the current link
+ * can not already beacon
+ */
+ if (wdev->valid_links &&
+ !wdev->links[link_id].ap.beacon_interval) {
+ /* nothing */
+ } else {
+ err = -EBUSY;
+ goto unlock;
+ }
+ }
+
+ if (wdev->links[link_id].cac_started) {
err = -EBUSY;
goto unlock;
}
@@ -10141,12 +10156,26 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
if (WARN_ON(!cac_time_ms))
cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS;
- err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms);
+ err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms,
+ link_id);
if (!err) {
- wdev->links[0].ap.chandef = chandef;
- wdev->cac_started = true;
- wdev->cac_start_time = jiffies;
- wdev->cac_time_ms = cac_time_ms;
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
+ wdev->links[0].ap.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ wdev->u.ibss.chandef = chandef;
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ wdev->u.mesh.chandef = chandef;
+ break;
+ default:
+ break;
+ }
+ wdev->links[link_id].cac_started = true;
+ wdev->links[link_id].cac_start_time = jiffies;
+ wdev->links[link_id].cac_time_ms = cac_time_ms;
}
unlock:
wiphy_unlock(wiphy);
@@ -10494,17 +10523,21 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
NL80211_BSS_CHAIN_SIGNAL))
goto nla_put_failure;
- switch (rdev->wiphy.signal_type) {
- case CFG80211_SIGNAL_TYPE_MBM:
- if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
- goto nla_put_failure;
- break;
- case CFG80211_SIGNAL_TYPE_UNSPEC:
- if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal))
- goto nla_put_failure;
- break;
- default:
- break;
+ if (intbss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ switch (rdev->wiphy.signal_type) {
+ case CFG80211_SIGNAL_TYPE_MBM:
+ if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ case CFG80211_SIGNAL_TYPE_UNSPEC:
+ if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC,
+ res->signal))
+ goto nla_put_failure;
+ break;
+ default:
+ break;
+ }
}
switch (wdev->iftype) {
@@ -16498,10 +16531,10 @@ nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
SELECTOR(__sel, NETDEV_UP_NOTMX, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX) \
- SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \
+ SELECTOR(__sel, NETDEV_UP_NOTMX_MLO, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_NO_WIPHY_MTX | \
- NL80211_FLAG_MLO_UNSUPPORTED) \
+ NL80211_FLAG_MLO_VALID_LINK_ID) \
SELECTOR(__sel, NETDEV_UP_CLEAR, \
NL80211_FLAG_NEED_NETDEV_UP | \
NL80211_FLAG_CLEAR_SKB) \
@@ -17396,7 +17429,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_NO_WIPHY_MTX |
- NL80211_FLAG_MLO_UNSUPPORTED),
+ NL80211_FLAG_MLO_VALID_LINK_ID),
},
{
.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index ec3f4aa1c807..f5adbf6b5c84 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1200,26 +1200,27 @@ static inline int
rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms)
+ u32 cac_time_ms, int link_id)
{
int ret = -EOPNOTSUPP;
trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
- cac_time_ms);
+ cac_time_ms, link_id);
if (rdev->ops->start_radar_detection)
ret = rdev->ops->start_radar_detection(&rdev->wiphy, dev,
- chandef, cac_time_ms);
+ chandef, cac_time_ms,
+ link_id);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
static inline void
rdev_end_cac(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev, unsigned int link_id)
{
- trace_rdev_end_cac(&rdev->wiphy, dev);
+ trace_rdev_end_cac(&rdev->wiphy, dev, link_id);
if (rdev->ops->end_cac)
- rdev->ops->end_cac(&rdev->wiphy, dev);
+ rdev->ops->end_cac(&rdev->wiphy, dev, link_id);
trace_rdev_return_void(&rdev->wiphy);
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4a27f3823e25..6489ba943a63 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -4229,6 +4229,8 @@ EXPORT_SYMBOL(regulatory_pre_cac_allowed);
static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
{
struct wireless_dev *wdev;
+ unsigned int link_id;
+
/* If we finished CAC or received radar, we should end any
* CAC running on the same channels.
* the check !cfg80211_chandef_dfs_usable contain 2 options:
@@ -4241,16 +4243,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
struct cfg80211_chan_def *chandef;
- if (!wdev->cac_started)
- continue;
+ for_each_valid_link(wdev, link_id) {
+ if (!wdev->links[link_id].cac_started)
+ continue;
- /* FIXME: radar detection is tied to link 0 for now */
- chandef = wdev_chandef(wdev, 0);
- if (!chandef)
- continue;
+ chandef = wdev_chandef(wdev, link_id);
+ if (!chandef)
+ continue;
- if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
- rdev_end_cac(rdev, wdev->netdev);
+ if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef))
+ rdev_end_cac(rdev, wdev->netdev, link_id);
+ }
}
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 64eeed82d43d..59a90bf3c0d6 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1910,6 +1910,7 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
known->pub.bssid_index = new->pub.bssid_index;
known->pub.use_for &= new->pub.use_for;
known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
+ known->bss_source = new->bss_source;
return true;
}
@@ -2008,10 +2009,10 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
return found;
free_ies:
- ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.beacon_ies);
if (ies)
kfree_rcu(ies, rcu_head);
- ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
+ ies = (void *)rcu_access_pointer(tmp->pub.proberesp_ies);
if (ies)
kfree_rcu(ies, rcu_head);
@@ -2149,11 +2150,7 @@ struct cfg80211_inform_single_bss_data {
const u8 *ie;
size_t ielen;
- enum {
- BSS_SOURCE_DIRECT = 0,
- BSS_SOURCE_MBSSID,
- BSS_SOURCE_STA_PROFILE,
- } bss_source;
+ enum bss_source_type bss_source;
/* Set if reporting bss_source != BSS_SOURCE_DIRECT */
struct cfg80211_bss *source_bss;
u8 max_bssid_indicator;
@@ -2268,6 +2265,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
IEEE80211_MAX_CHAINS);
tmp.pub.use_for = data->use_for;
tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
+ tmp.bss_source = data->bss_source;
switch (data->bss_source) {
case BSS_SOURCE_MBSSID:
@@ -2907,6 +2905,9 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
struct element *reporter_rnr = NULL;
struct ieee80211_multi_link_elem *ml_elem;
struct cfg80211_mle *mle;
+ const struct element *ssid_elem;
+ const u8 *ssid = NULL;
+ size_t ssid_len = 0;
u16 control;
u8 ml_common_len;
u8 *new_ie = NULL;
@@ -2961,6 +2962,13 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
bss_change_count,
gfp);
+ ssid_elem = cfg80211_find_elem(WLAN_EID_SSID, tx_data->ie,
+ tx_data->ielen);
+ if (ssid_elem) {
+ ssid = ssid_elem->data;
+ ssid_len = ssid_elem->datalen;
+ }
+
for (i = 0; i < ARRAY_SIZE(mle->sta_prof) && mle->sta_prof[i]; i++) {
const struct ieee80211_neighbor_ap_info *ap_info;
enum nl80211_band band;
@@ -3042,6 +3050,23 @@ cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
freq = ieee80211_channel_to_freq_khz(ap_info->channel, band);
data.channel = ieee80211_get_channel_khz(wiphy, freq);
+ /* Skip if BSS entry generated from MBSSID or DIRECT source
+ * frame data available already.
+ */
+ bss = cfg80211_get_bss(wiphy, data.channel, data.bssid, ssid,
+ ssid_len, IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ if (bss) {
+ struct cfg80211_internal_bss *ibss = bss_from_pub(bss);
+
+ if (data.capability == bss->capability &&
+ ibss->bss_source != BSS_SOURCE_STA_PROFILE) {
+ cfg80211_put_bss(wiphy, bss);
+ continue;
+ }
+ cfg80211_put_bss(wiphy, bss);
+ }
+
if (use_for == NL80211_BSS_USE_FOR_MLD_LINK &&
!(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) {
use_for = 0;
@@ -3467,8 +3492,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
n_channels = ieee80211_get_num_supported_channels(wiphy);
}
- creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
- n_channels * sizeof(void *),
+ creq = kzalloc(struct_size(creq, channels, n_channels) +
+ sizeof(struct cfg80211_ssid),
GFP_ATOMIC);
if (!creq)
return -ENOMEM;
@@ -3476,7 +3501,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
creq->wiphy = wiphy;
creq->wdev = dev->ieee80211_ptr;
/* SSIDs come after channels */
- creq->ssids = (void *)&creq->channels[n_channels];
+ creq->ssids = (void *)creq + struct_size(creq, channels, n_channels);
creq->n_channels = n_channels;
creq->n_ssids = 1;
creq->scan_start = jiffies;
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d9d7bf8bb5c1..431da30817a6 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -115,7 +115,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
n_channels = i;
}
request->n_channels = n_channels;
- request->ssids = (void *)&request->channels[n_channels];
+ request->ssids = (void *)request +
+ struct_size(request, channels, n_channels);
request->n_ssids = 1;
memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 5c26f065bd68..97c21b627791 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -805,9 +805,22 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa,
TP_ARGS(wiphy, netdev)
);
-DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
- TP_ARGS(wiphy, netdev)
+TRACE_EVENT(rdev_end_cac,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ unsigned int link_id),
+ TP_ARGS(wiphy, netdev, link_id),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(unsigned int, link_id)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->link_id = link_id;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id)
);
DECLARE_EVENT_CLASS(station_add_change,
@@ -2652,24 +2665,26 @@ TRACE_EVENT(rdev_external_auth,
TRACE_EVENT(rdev_start_radar_detection,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- u32 cac_time_ms),
- TP_ARGS(wiphy, netdev, chandef, cac_time_ms),
+ u32 cac_time_ms, int link_id),
+ TP_ARGS(wiphy, netdev, chandef, cac_time_ms, link_id),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(u32, cac_time_ms)
+ __field(int, link_id)
),
TP_fast_assign(
WIPHY_ASSIGN;
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->cac_time_ms = cac_time_ms;
+ __entry->link_id = link_id;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
- ", cac_time_ms=%u",
+ ", cac_time_ms=%u, link_id=%d",
WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
- __entry->cac_time_ms)
+ __entry->cac_time_ms, __entry->link_id)
);
TRACE_EVENT(rdev_set_mcast_rate,
@@ -3483,18 +3498,21 @@ TRACE_EVENT(cfg80211_radar_event,
);
TRACE_EVENT(cfg80211_cac_event,
- TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt),
- TP_ARGS(netdev, evt),
+ TP_PROTO(struct net_device *netdev, enum nl80211_radar_event evt,
+ unsigned int link_id),
+ TP_ARGS(netdev, evt, link_id),
TP_STRUCT__entry(
NETDEV_ENTRY
__field(enum nl80211_radar_event, evt)
+ __field(unsigned int, link_id)
),
TP_fast_assign(
NETDEV_ASSIGN;
__entry->evt = evt;
+ __entry->link_id = link_id;
),
- TP_printk(NETDEV_PR_FMT ", event: %d",
- NETDEV_PR_ARG, __entry->evt)
+ TP_printk(NETDEV_PR_FMT ", event: %d, link_id=%u",
+ NETDEV_PR_ARG, __entry->evt, __entry->link_id)
);
DECLARE_EVENT_CLASS(cfg80211_rx_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9a7c3adc8a3b..f49b55724f83 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -998,10 +998,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
* Diffserv Service Classes no update is needed:
* - Standard: DF
* - Low Priority Data: CS1
- * - Multimedia Streaming: AF31, AF32, AF33
* - Multimedia Conferencing: AF41, AF42, AF43
* - Network Control Traffic: CS7
* - Real-Time Interactive: CS4
+ * - Signaling: CS5
*/
switch (dscp >> 2) {
case 10:
@@ -1026,9 +1026,11 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
/* Broadcasting video: CS3 */
ret = 4;
break;
- case 40:
- /* Signaling: CS5 */
- ret = 5;
+ case 26:
+ case 28:
+ case 30:
+ /* Multimedia Streaming: AF31, AF32, AF33 */
+ ret = 4;
break;
case 44:
/* Voice Admit: VA */
@@ -2435,8 +2437,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
if (params->num_different_channels > c->num_different_channels)
continue;
- limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
- GFP_KERNEL);
+ limits = kmemdup_array(c->limits, c->n_limits, sizeof(*limits),
+ GFP_KERNEL);
if (!limits)
return -ENOMEM;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index c0e0204b9630..521a2938e50a 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -211,6 +211,11 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
goto err_unreg_pool;
}
+ if (dev_get_min_mp_channel_count(netdev)) {
+ err = -EBUSY;
+ goto err_unreg_pool;
+ }
+
bpf.command = XDP_SETUP_XSK_POOL;
bpf.xsk.pool = pool;
bpf.xsk.queue_id = queue_id;
@@ -623,20 +628,31 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
return nb_entries;
}
-u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+static u32 xp_alloc_slow(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ u32 max)
{
- u32 nb_entries1 = 0, nb_entries2;
+ int i;
- if (unlikely(pool->dev && dma_dev_need_sync(pool->dev))) {
+ for (i = 0; i < max; i++) {
struct xdp_buff *buff;
- /* Slow path */
buff = xp_alloc(pool);
- if (buff)
- *xdp = buff;
- return !!buff;
+ if (unlikely(!buff))
+ return i;
+ *xdp = buff;
+ xdp++;
}
+ return max;
+}
+
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 nb_entries1 = 0, nb_entries2;
+
+ if (unlikely(pool->dev && dma_dev_need_sync(pool->dev)))
+ return xp_alloc_slow(pool, xdp, max);
+
if (unlikely(pool->free_list_cnt)) {
nb_entries1 = xp_alloc_reused(pool, xdp, max);
if (nb_entries1 == max)
@@ -656,9 +672,17 @@ EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
+ u32 req_count, avail_count;
+
if (pool->free_list_cnt >= count)
return true;
- return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+
+ req_count = count - pool->free_list_cnt;
+ avail_count = xskq_cons_nb_entries(pool->fq, req_count);
+ if (!avail_count)
+ pool->fq->queue_empty_descs++;
+
+ return avail_count >= req_count;
}
EXPORT_SYMBOL(xp_can_alloc);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6f2d1621c992..406b20dfee8d 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -306,11 +306,6 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
return entries >= max ? max : entries;
}
-static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
-{
- return xskq_cons_nb_entries(q, cnt) >= cnt;
-}
-
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_prod == q->cached_cons)
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 9a44d363ba62..f123b7c9ec82 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -328,12 +328,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
/* User explicitly requested packet offload mode and configured
* policy in addition to the XFRM state. So be civil to users,
* and return an error instead of taking fallback path.
- *
- * This WARN_ON() can be seen as a documentation for driver
- * authors to do not return -EOPNOTSUPP in packet offload mode.
*/
- WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
- if (err != -EOPNOTSUPP || is_packet_offload) {
+ if ((err != -EOPNOTSUPP && !is_packet_offload) || is_packet_offload) {
NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
return err;
}
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index e50e4bf993fa..98f1e2b67c76 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -769,7 +769,7 @@ static int xfrmi_dev_init(struct net_device *dev)
if (err)
return err;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= XFRMI_FEATURES;
dev->hw_features |= XFRMI_FEATURES;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c56c61b0c12e..914bac03b52a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -45,6 +45,7 @@
#ifdef CONFIG_XFRM_ESPINTCP
#include <net/espintcp.h>
#endif
+#include <net/inet_dscp.h>
#include "xfrm_hash.h"
@@ -109,7 +110,11 @@ struct xfrm_pol_inexact_node {
* 4. saddr:any list from saddr tree
*
* This result set then needs to be searched for the policy with
- * the lowest priority. If two results have same prio, youngest one wins.
+ * the lowest priority. If two candidates have the same priority, the
+ * struct xfrm_policy pos member with the lower number is used.
+ *
+ * This replicates previous single-list-search algorithm which would
+ * return first matching policy in the (ordered-by-priority) list.
*/
struct xfrm_pol_inexact_key {
@@ -196,8 +201,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
@@ -410,7 +413,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
- INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
@@ -1228,26 +1230,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}
- chain = &net->xfrm.policy_inexact[dir];
- xfrm_policy_insert_inexact_list(chain, policy);
-
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
+static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
+{
+ int dir;
+
+ if (policy->walk.dead)
+ return true;
+
+ dir = xfrm_policy_id2dir(policy->index);
+ return dir >= XFRM_POLICY_MAX;
+}
+
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
- unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
- struct hlist_head *odst;
struct hlist_node *newpos;
- int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
@@ -1274,13 +1281,10 @@ static void xfrm_hash_rebuild(struct work_struct *work)
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX)
- continue;
-
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
if (policy->family == AF_INET) {
dbits = rbits4;
@@ -1311,23 +1315,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}
- /* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
- struct hlist_node *n;
-
- hlist_for_each_entry_safe(policy, n,
- &net->xfrm.policy_inexact[dir],
- bydst_inexact_list) {
- hlist_del_rcu(&policy->bydst);
- hlist_del_init(&policy->bydst_inexact_list);
- }
-
- hmask = net->xfrm.policy_bydst[dir].hmask;
- odst = net->xfrm.policy_bydst[dir].table;
- for (i = hmask; i >= 0; i--) {
- hlist_for_each_entry_safe(policy, n, odst + i, bydst)
- hlist_del_rcu(&policy->bydst);
- }
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -1345,14 +1333,13 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
- if (policy->walk.dead)
+ if (xfrm_policy_is_dead_or_sk(policy))
continue;
- dir = xfrm_policy_id2dir(policy->index);
- if (dir >= XFRM_POLICY_MAX) {
- /* skip socket policies */
- continue;
- }
+
+ hlist_del_rcu(&policy->bydst);
+
newpos = NULL;
+ dir = xfrm_policy_id2dir(policy->index);
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
@@ -1519,42 +1506,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};
-static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
- struct xfrm_policy *policy)
-{
- struct xfrm_policy *pol, *delpol = NULL;
- struct hlist_node *newpos = NULL;
- int i = 0;
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if (pol->type == policy->type &&
- pol->if_id == policy->if_id &&
- !selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(&policy->mark, pol) &&
- xfrm_sec_ctx_match(pol->security, policy->security) &&
- !WARN_ON(delpol)) {
- delpol = pol;
- if (policy->priority > pol->priority)
- continue;
- } else if (policy->priority >= pol->priority) {
- newpos = &pol->bydst_inexact_list;
- continue;
- }
- if (delpol)
- break;
- }
-
- if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
- hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
- else
- hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
-
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- pol->pos = i;
- i++;
- }
-}
-
static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
@@ -2294,10 +2245,52 @@ out:
return pol;
}
+static u32 xfrm_gen_pos_slow(struct net *net)
+{
+ struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* oldest entry is last in list */
+ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+ if (!xfrm_policy_is_dead_or_sk(policy))
+ policy->pos = ++i;
+ }
+
+ return i;
+}
+
+static u32 xfrm_gen_pos(struct net *net)
+{
+ const struct xfrm_policy *policy;
+ u32 i = 0;
+
+ /* most recently added policy is at the head of the list */
+ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
+ if (xfrm_policy_is_dead_or_sk(policy))
+ continue;
+
+ if (policy->pos == UINT_MAX)
+ return xfrm_gen_pos_slow(net);
+
+ i = policy->pos + 1;
+ break;
+ }
+
+ return i;
+}
+
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
+ switch (dir) {
+ case XFRM_POLICY_IN:
+ case XFRM_POLICY_FWD:
+ case XFRM_POLICY_OUT:
+ pol->pos = xfrm_gen_pos(net);
+ break;
+ }
+
list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
@@ -2314,7 +2307,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
- hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
@@ -2561,7 +2553,7 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
static int xfrm_get_tos(const struct flowi *fl, int family)
{
if (family == AF_INET)
- return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
+ return fl->u.ip4.flowi4_tos & INET_DSCP_MASK;
return 0;
}
@@ -4437,63 +4429,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
-static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
- const struct xfrm_selector *sel_tgt)
-{
- if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
- if (sel_tgt->family == sel_cmp->family &&
- xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
- sel_cmp->family) &&
- xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
- sel_cmp->family) &&
- sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
- sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
- return true;
- }
- } else {
- if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
- return true;
- }
- }
- return false;
-}
-
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
u8 dir, u8 type, struct net *net, u32 if_id)
{
- struct xfrm_policy *pol, *ret = NULL;
- struct hlist_head *chain;
- u32 priority = ~0U;
+ struct xfrm_policy *pol;
+ struct flowi fl;
- spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
- hlist_for_each_entry(pol, chain, bydst) {
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
- priority = ret->priority;
- break;
- }
- }
- chain = &net->xfrm.policy_inexact[dir];
- hlist_for_each_entry(pol, chain, bydst_inexact_list) {
- if ((pol->priority >= priority) && ret)
- break;
+ memset(&fl, 0, sizeof(fl));
- if ((if_id == 0 || pol->if_id == if_id) &&
- xfrm_migrate_selector_match(sel, &pol->selector) &&
- pol->type == type) {
- ret = pol;
+ fl.flowi_proto = sel->proto;
+
+ switch (sel->family) {
+ case AF_INET:
+ fl.u.ip4.saddr = sel->saddr.a4;
+ fl.u.ip4.daddr = sel->daddr.a4;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
break;
- }
+ fl.u.flowi4_oif = sel->ifindex;
+ fl.u.ip4.fl4_sport = sel->sport;
+ fl.u.ip4.fl4_dport = sel->dport;
+ break;
+ case AF_INET6:
+ fl.u.ip6.saddr = sel->saddr.in6;
+ fl.u.ip6.daddr = sel->daddr.in6;
+ if (sel->proto == IPSEC_ULPROTO_ANY)
+ break;
+ fl.u.flowi6_oif = sel->ifindex;
+ fl.u.ip6.fl4_sport = sel->sport;
+ fl.u.ip6.fl4_dport = sel->dport;
+ break;
+ default:
+ return ERR_PTR(-EAFNOSUPPORT);
}
- xfrm_pol_hold(ret);
+ rcu_read_lock();
- spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+ pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
+ if (IS_ERR_OR_NULL(pol))
+ goto out_unlock;
- return ret;
+ if (!xfrm_pol_hold_rcu(pol))
+ pol = NULL;
+out_unlock:
+ rcu_read_unlock();
+ return pol;
}
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
@@ -4630,9 +4609,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* Stage 1 - find policy */
pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
- if (!pol) {
+ if (IS_ERR_OR_NULL(pol)) {
NL_SET_ERR_MSG(extack, "Target policy not found");
- err = -ENOENT;
+ err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}