summaryrefslogtreecommitdiff
path: root/include/net/rps.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 18:36:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 18:36:10 -0700
commit91a4855d6c03e770e42f17c798a36a3c46e63de2 (patch)
tree5103bfe3aea2aab7e8b358c5c9329539508f648d /include/net/rps.h
parentf5ad4101009e7f5f5984ffea6923d4fcd470932a (diff)
parent35c2c39832e569449b9192fa1afbbc4c66227af7 (diff)
downloadlwn-91a4855d6c03e770e42f17c798a36a3c46e63de2.tar.gz
lwn-91a4855d6c03e770e42f17c798a36a3c46e63de2.zip
Merge tag 'net-next-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core & protocols: - Support HW queue leasing, allowing containers to be granted access to HW queues for zero-copy operations and AF_XDP - Number of code moves to help the compiler with inlining. Avoid output arguments for returning drop reason where possible - Rework drop handling within qdiscs to include more metadata about the reason and dropping qdisc in the tracepoints - Remove the rtnl_lock use from IP Multicast Routing - Pack size information into the Rx Flow Steering table pointer itself. This allows making the table itself a flat array of u32s, thus making the table allocation size a power of two - Report TCP delayed ack timer information via socket diag - Add ip_local_port_step_width sysctl to allow distributing the randomly selected ports more evenly throughout the allowed space - Add support for per-route tunsrc in IPv6 segment routing - Start work of switching sockopt handling to iov_iter - Improve dynamic recvbuf sizing in MPTCP, limit burstiness and avoid buffer size drifting up - Support MSG_EOR in MPTCP - Add stp_mode attribute to the bridge driver for STP mode selection. This addresses concerns about call_usermodehelper() usage - Remove UDP-Lite support (as announced in 2023) - Remove support for building IPv6 as a module. Remove the now unnecessary function calling indirection Cross-tree stuff: - Move Michael MIC code from generic crypto into wireless, it's considered insecure but some WiFi networks still need it Netfilter: - Switch nft_fib_ipv6 module to no longer need temporary dst_entry object allocations by using fib6_lookup() + RCU. Florian W reports this gets us ~13% higher packet rate - Convert IPVS's global __ip_vs_mutex to per-net service_mutex and switch the service tables to be per-net. Convert some code that walks the service lists to use RCU instead of the service_mutex - Add more opinionated input validation to lower security exposure - Make IPVS hash tables to be per-netns and resizable Wireless: - Finished assoc frame encryption/EPPKE/802.1X-over-auth - Radar detection improvements - Add 6 GHz incumbent signal detection APIs - Multi-link support for FILS, probe response templates and client probing - New APIs and mac80211 support for NAN (Neighbor Aware Networking, aka Wi-Fi Aware) so less work must be in firmware Driver API: - Add numerical ID for devlink instances (to avoid having to create fake bus/device pairs just to have an ID). Support shared devlink instances which span multiple PFs - Add standard counters for reporting pause storm events (implement in mlx5 and fbnic) - Add configuration API for completion writeback buffering (implement in mana) - Support driver-initiated change of RSS context sizes - Support DPLL monitoring input frequency (implement in zl3073x) - Support per-port resources in devlink (implement in mlx5) Misc: - Expand the YAML spec for Netfilter Drivers - Software: - macvlan: support multicast rx for bridge ports with shared source MAC address - team: decouple receive and transmit enablement for IEEE 802.3ad LACP "independent control" - Ethernet high-speed NICs: - nVidia/Mellanox: - support high order pages in zero-copy mode (for payload coalescing) - support multiple packets in a page (for systems with 64kB pages) - Broadcom 25-400GE (bnxt): - implement XDP RSS hash metadata extraction - add software fallback for UDP GSO, lowering the IOMMU cost - Broadcom 800GE (bnge): - add link status and configuration handling - add various HW and SW statistics - Marvell/Cavium: - NPC HW block support for cn20k - Huawei (hinic3): - add mailbox / control queue - add rx VLAN offload - add driver info and link management - Ethernet NICs: - Marvell/Aquantia: - support reading SFP module info on some AQC100 cards - Realtek PCI (r8169): - add support for RTL8125cp - Realtek USB (r8152): - support for the RTL8157 5Gbit chip - add 2500baseT EEE status/configuration support - Ethernet NICs embedded and off-the-shelf IP: - Synopsys (stmmac): - cleanup and reorganize SerDes handling and PCS support - cleanup descriptor handling and per-platform data - cleanup and consolidate MDIO defines and handling - shrink driver memory use for internal structures - improve Tx IRQ coalescing - improve TCP segmentation handling - add support for Spacemit K3 - Cadence (macb): - support PHYs that have inband autoneg disabled with GEM - support IEEE 802.3az EEE - rework usrio capabilities and handling - AMD (xgbe): - improve power management for S0i3 - improve TX resilience for link-down handling - Virtual: - Google cloud vNIC: - support larger ring sizes in DQO-QPL mode - improve HW-GRO handling - support UDP GSO for DQO format - PCIe NTB: - support queue count configuration - Ethernet PHYs: - automatically disable PHY autonomous EEE if MAC is in charge - Broadcom: - add BCM84891/BCM84892 support - Micrel: - support for LAN9645X internal PHY - Realtek: - add RTL8224 pair order support - support PHY LEDs on RTL8211F-VD - support spread spectrum clocking (SSC) - Maxlinear: - add PHY-level statistics via ethtool - Ethernet switches: - Maxlinear (mxl862xx): - support for bridge offloading - support for VLANs - support driver statistics - Bluetooth: - large number of fixes and new device IDs - Mediatek: - support MT6639 (MT7927) - support MT7902 SDIO - WiFi: - Intel (iwlwifi): - UNII-9 and continuing UHR work - MediaTek (mt76): - mt7996/mt7925 MLO fixes/improvements - mt7996 NPU support (HW eth/wifi traffic offload) - Qualcomm (ath12k): - monitor mode support on IPQ5332 - basic hwmon temperature reporting - support IPQ5424 - Realtek: - add USB RX aggregation to improve performance - add USB TX flow control by tracking in-flight URBs - Cellular: - IPA v5.2 support" * tag 'net-next-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1561 commits) net: pse-pd: fix kernel-doc function name for pse_control_find_by_id() wireguard: device: use exit_rtnl callback instead of manual rtnl_lock in pre_exit wireguard: allowedips: remove redundant space tools: ynl: add sample for wireguard wireguard: allowedips: Use kfree_rcu() instead of call_rcu() MAINTAINERS: Add netkit selftest files selftests/net: Add additional test coverage in nk_qlease selftests/net: Split netdevsim tests from HW tests in nk_qlease tools/ynl: Make YnlFamily closeable as a context manager net: airoha: Add missing PPE configurations in airoha_ppe_hw_init() net: airoha: Fix VIP configuration for AN7583 SoC net: caif: clear client service pointer on teardown net: strparser: fix skb_head leak in strp_abort_strp() net: usb: cdc-phonet: fix skb frags[] overflow in rx_complete() selftests/bpf: add test for xdp_master_redirect with bond not up net, bpf: fix null-ptr-deref in xdp_master_redirect() for down master net: airoha: Remove PCE_MC_EN_MASK bit in REG_FE_PCE_CFG configuration sctp: disable BH before calling udp_tunnel_xmit_skb() sctp: fix missing encap_port propagation for GSO fragments net: airoha: Rely on net_device pointer in ETS callbacks ...
Diffstat (limited to 'include/net/rps.h')
-rw-r--r--include/net/rps.h49
1 files changed, 19 insertions, 30 deletions
diff --git a/include/net/rps.h b/include/net/rps.h
index f1794cd2e7fb..e33c6a2fa8bb 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -8,6 +8,7 @@
#include <net/hotdata.h>
#ifdef CONFIG_RPS
+#include <net/rps-types.h>
extern struct static_key_false rps_needed;
extern struct static_key_false rfs_needed;
@@ -39,17 +40,6 @@ struct rps_dev_flow {
#define RPS_NO_FILTER 0xffff
/*
- * The rps_dev_flow_table structure contains a table of flow mappings.
- */
-struct rps_dev_flow_table {
- u8 log;
- struct rcu_head rcu;
- struct rps_dev_flow flows[];
-};
-#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
- ((_num) * sizeof(struct rps_dev_flow)))
-
-/*
* The rps_sock_flow_table contains mappings of flows to the last CPU
* on which they were processed by the application (set in recvmsg).
* Each entry is a 32bit value. Upper part is the high-order bits
@@ -60,41 +50,38 @@ struct rps_dev_flow_table {
* meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
- struct rcu_head rcu;
- u32 mask;
-
- u32 ents[] ____cacheline_aligned_in_smp;
+ u32 ent;
};
-#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
#define RPS_NO_CPU 0xffff
-static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
- u32 hash)
+static inline void rps_record_sock_flow(rps_tag_ptr tag_ptr, u32 hash)
{
- unsigned int index = hash & table->mask;
+ unsigned int index = hash & rps_tag_to_mask(tag_ptr);
u32 val = hash & ~net_hotdata.rps_cpu_mask;
+ struct rps_sock_flow_table *table;
/* We only give a hint, preemption can change CPU under us */
val |= raw_smp_processor_id();
+ table = rps_tag_to_table(tag_ptr);
/* The following WRITE_ONCE() is paired with the READ_ONCE()
* here, and another one in get_rps_cpu().
*/
- if (READ_ONCE(table->ents[index]) != val)
- WRITE_ONCE(table->ents[index], val);
+ if (READ_ONCE(table[index].ent) != val)
+ WRITE_ONCE(table[index].ent, val);
}
static inline void _sock_rps_record_flow_hash(__u32 hash)
{
- struct rps_sock_flow_table *sock_flow_table;
+ rps_tag_ptr tag_ptr;
if (!hash)
return;
rcu_read_lock();
- sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
- if (sock_flow_table)
- rps_record_sock_flow(sock_flow_table, hash);
+ tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
+ if (tag_ptr)
+ rps_record_sock_flow(tag_ptr, hash);
rcu_read_unlock();
}
@@ -121,6 +108,7 @@ static inline void _sock_rps_record_flow(const struct sock *sk)
static inline void _sock_rps_delete_flow(const struct sock *sk)
{
struct rps_sock_flow_table *table;
+ rps_tag_ptr tag_ptr;
u32 hash, index;
hash = READ_ONCE(sk->sk_rxhash);
@@ -128,11 +116,12 @@ static inline void _sock_rps_delete_flow(const struct sock *sk)
return;
rcu_read_lock();
- table = rcu_dereference(net_hotdata.rps_sock_flow_table);
- if (table) {
- index = hash & table->mask;
- if (READ_ONCE(table->ents[index]) != RPS_NO_CPU)
- WRITE_ONCE(table->ents[index], RPS_NO_CPU);
+ tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table);
+ if (tag_ptr) {
+ index = hash & rps_tag_to_mask(tag_ptr);
+ table = rps_tag_to_table(tag_ptr);
+ if (READ_ONCE(table[index].ent) != RPS_NO_CPU)
+ WRITE_ONCE(table[index].ent, RPS_NO_CPU);
}
rcu_read_unlock();
}