diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-11 10:55:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-11 10:55:49 -0700 |
commit | 237f83dfbe668443b5e31c3c7576125871cca674 (patch) | |
tree | 11848a8d0aa414a1d3ce2024e181071b1d9dea08 /include/net | |
parent | 8f6ccf6159aed1f04c6d179f61f6fb2691261e84 (diff) | |
parent | 1ff2f0fa450ea4e4f87793d9ed513098ec6e12be (diff) | |
download | lwn-237f83dfbe668443b5e31c3c7576125871cca674.tar.gz lwn-237f83dfbe668443b5e31c3c7576125871cca674.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Some highlights from this development cycle:
1) Big refactoring of ipv6 route and neigh handling to support
nexthop objects configurable as units from userspace. From David
Ahern.
2) Convert explored_states in BPF verifier into a hash table,
significantly decreased state held for programs with bpf2bpf
calls, from Alexei Starovoitov.
3) Implement bpf_send_signal() helper, from Yonghong Song.
4) Various classifier enhancements to mvpp2 driver, from Maxime
Chevallier.
5) Add aRFS support to hns3 driver, from Jian Shen.
6) Fix use after free in inet frags by allocating fqdirs dynamically
and reworking how rhashtable dismantle occurs, from Eric Dumazet.
7) Add act_ctinfo packet classifier action, from Kevin
Darbyshire-Bryant.
8) Add TFO key backup infrastructure, from Jason Baron.
9) Remove several old and unused ISDN drivers, from Arnd Bergmann.
10) Add devlink notifications for flash update status to mlxsw driver,
from Jiri Pirko.
11) Lots of kTLS offload infrastructure fixes, from Jakub Kicinski.
12) Add support for mv88e6250 DSA chips, from Rasmus Villemoes.
13) Various enhancements to ipv6 flow label handling, from Eric
Dumazet and Willem de Bruijn.
14) Support TLS offload in nfp driver, from Jakub Kicinski, Dirk van
der Merwe, and others.
15) Various improvements to axienet driver including converting it to
phylink, from Robert Hancock.
16) Add PTP support to sja1105 DSA driver, from Vladimir Oltean.
17) Add mqprio qdisc offload support to dpaa2-eth, from Ioana
Radulescu.
18) Add devlink health reporting to mlx5, from Moshe Shemesh.
19) Convert stmmac over to phylink, from Jose Abreu.
20) Add PTP PHC (Physical Hardware Clock) support to mlxsw, from
Shalom Toledo.
21) Add nftables SYNPROXY support, from Fernando Fernandez Mancera.
22) Convert tcp_fastopen over to use SipHash, from Ard Biesheuvel.
23) Track spill/fill of constants in BPF verifier, from Alexei
Starovoitov.
24) Support bounded loops in BPF, from Alexei Starovoitov.
25) Various page_pool API fixes and improvements, from Jesper Dangaard
Brouer.
26) Just like ipv4, support ref-countless ipv6 route handling. From
Wei Wang.
27) Support VLAN offloading in aquantia driver, from Igor Russkikh.
28) Add AF_XDP zero-copy support to mlx5, from Maxim Mikityanskiy.
29) Add flower GRE encap/decap support to nfp driver, from Pieter
Jansen van Vuuren.
30) Protect against stack overflow when using act_mirred, from John
Hurley.
31) Allow devmap map lookups from eBPF, from Toke Høiland-Jørgensen.
32) Use page_pool API in netsec driver, Ilias Apalodimas.
33) Add Google gve network driver, from Catherine Sullivan.
34) More indirect call avoidance, from Paolo Abeni.
35) Add kTLS TX HW offload support to mlx5, from Tariq Toukan.
36) Add XDP_REDIRECT support to bnxt_en, from Andy Gospodarek.
37) Add MPLS manipulation actions to TC, from John Hurley.
38) Add sending a packet to connection tracking from TC actions, and
then allow flower classifier matching on conntrack state. From
Paul Blakey.
39) Netfilter hw offload support, from Pablo Neira Ayuso"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2080 commits)
net/mlx5e: Return in default case statement in tx_post_resync_params
mlx5: Return -EINVAL when WARN_ON_ONCE triggers in mlx5e_tls_resync().
net: dsa: add support for BRIDGE_MROUTER attribute
pkt_sched: Include const.h
net: netsec: remove static declaration for netsec_set_tx_de()
net: netsec: remove superfluous if statement
netfilter: nf_tables: add hardware offload support
net: flow_offload: rename tc_cls_flower_offload to flow_cls_offload
net: flow_offload: add flow_block_cb_is_busy() and use it
net: sched: remove tcf block API
drivers: net: use flow block API
net: sched: use flow block API
net: flow_offload: add flow_block_cb_{priv, incref, decref}()
net: flow_offload: add list handling functions
net: flow_offload: add flow_block_cb_alloc() and flow_block_cb_free()
net: flow_offload: rename TCF_BLOCK_BINDER_TYPE_* to FLOW_BLOCK_BINDER_TYPE_*
net: flow_offload: rename TC_BLOCK_{UN}BIND to FLOW_BLOCK_{UN}BIND
net: flow_offload: add flow_block_cb_setup_simple()
net: hisilicon: Add an tx_desc to adapt HI13X1_GMAC
net: hisilicon: Add an rx_desc to adapt HI13X1_GMAC
...
Diffstat (limited to 'include/net')
61 files changed, 1634 insertions, 350 deletions
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9a5330eed794..5bc1e30dedde 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1143,6 +1143,26 @@ struct hci_cp_write_sc_support { __u8 support; } __packed; +#define HCI_OP_READ_AUTH_PAYLOAD_TO 0x0c7b +struct hci_cp_read_auth_payload_to { + __le16 handle; +} __packed; +struct hci_rp_read_auth_payload_to { + __u8 status; + __le16 handle; + __le16 timeout; +} __packed; + +#define HCI_OP_WRITE_AUTH_PAYLOAD_TO 0x0c7c +struct hci_cp_write_auth_payload_to { + __le16 handle; + __le16 timeout; +} __packed; +struct hci_rp_write_auth_payload_to { + __u8 status; + __le16 handle; +} __packed; + #define HCI_OP_READ_LOCAL_OOB_EXT_DATA 0x0c7d struct hci_rp_read_local_oob_ext_data { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 05b1b96f4d9e..ded574b32c20 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -199,6 +199,8 @@ struct adv_info { /* Default min/max age of connection information (1s/3s) */ #define DEFAULT_CONN_INFO_MIN_AGE 1000 #define DEFAULT_CONN_INFO_MAX_AGE 3000 +/* Default authenticated payload timeout 30s */ +#define DEFAULT_AUTH_PAYLOAD_TIMEOUT 0x0bb8 struct amp_assoc { __u16 len; @@ -275,6 +277,7 @@ struct hci_dev { __u16 discov_interleaved_timeout; __u16 conn_info_min_age; __u16 conn_info_max_age; + __u16 auth_payload_timeout; __u8 ssp_debug_mode; __u8 hw_error_code; __u32 clock; @@ -481,6 +484,7 @@ struct hci_conn { __u16 disc_timeout; __u16 conn_timeout; __u16 setting; + __u16 auth_payload_timeout; __u16 le_conn_min_interval; __u16 le_conn_max_interval; __u16 le_conn_interval; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 2a05cc349018..9d382f2f0bc5 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -63,6 +63,7 @@ enum { BOND_OPT_AD_ACTOR_SYSTEM, BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_NUM_PEER_NOTIF_ALIAS, + BOND_OPT_PEER_NOTIF_DELAY, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index b46d68acf701..f7fe45689142 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -38,6 +38,15 @@ #define __long_aligned __attribute__((aligned((sizeof(long))))) #endif +#define slave_info(bond_dev, slave_dev, fmt, ...) \ + netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) +#define slave_warn(bond_dev, slave_dev, fmt, ...) \ + netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) +#define slave_dbg(bond_dev, slave_dev, fmt, ...) \ + netdev_dbg(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) +#define slave_err(bond_dev, slave_dev, fmt, ...) \ + netdev_err(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) + #define BOND_MODE(bond) ((bond)->params.mode) /* slave list primitives */ @@ -114,6 +123,7 @@ struct bond_params { int fail_over_mac; int updelay; int downdelay; + int peer_notif_delay; int lacp_fast; unsigned int min_links; int ad_select; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1fce25b1d87f..88c27153a4bc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -379,16 +379,18 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband, } /** - * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA - * @sband: the sband to search for the STA on + * ieee80211_get_he_iftype_cap - return HE capabilities for an sband's iftype + * @sband: the sband to search for the iftype on + * @iftype: enum nl80211_iftype * * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found */ static inline const struct ieee80211_sta_he_cap * -ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband) +ieee80211_get_he_iftype_cap(const struct ieee80211_supported_band *sband, + u8 iftype) { const struct ieee80211_sband_iftype_data *data = - ieee80211_get_sband_iftype_data(sband, NL80211_IFTYPE_STATION); + ieee80211_get_sband_iftype_data(sband, iftype); if (data && data->he_cap.has_he) return &data->he_cap; @@ -397,6 +399,18 @@ ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband) } /** + * ieee80211_get_he_sta_cap - return HE capabilities for an sband's STA + * @sband: the sband to search for the STA on + * + * Return: pointer to the struct ieee80211_sta_he_cap, or NULL is none found + */ +static inline const struct ieee80211_sta_he_cap * +ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband) +{ + return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION); +} + +/** * wiphy_read_of_freq_limits - read frequency limits from device tree * * @wiphy: the wireless device to get extra limits for @@ -739,6 +753,9 @@ struct survey_info { * CFG80211_MAX_WEP_KEYS WEP keys * @wep_tx_key: key index (0..3) of the default TX static WEP key * @psk: PSK (for devices supporting 4-way-handshake offload) + * @sae_pwd: password for SAE authentication (for devices supporting SAE + * offload) + * @sae_pwd_len: length of SAE password (for devices supporting SAE offload) */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -754,6 +771,8 @@ struct cfg80211_crypto_settings { struct key_params *wep_keys; int wep_tx_key; const u8 *psk; + const u8 *sae_pwd; + u8 sae_pwd_len; }; /** @@ -875,6 +894,7 @@ enum cfg80211_ap_settings_flags { * @he_cap: HE capabilities (or %NULL if HE isn't enabled) * @ht_required: stations must support HT * @vht_required: stations must support VHT + * @twt_responder: Enable Target Wait Time * @flags: flags, as defined in enum cfg80211_ap_settings_flags */ struct cfg80211_ap_settings { @@ -901,6 +921,7 @@ struct cfg80211_ap_settings { const struct ieee80211_vht_cap *vht_cap; const struct ieee80211_he_cap_elem *he_cap; bool ht_required, vht_required; + bool twt_responder; u32 flags; }; @@ -4149,6 +4170,8 @@ struct sta_opmode_info { u8 rx_nss; }; +#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA)) + /** * struct wiphy_vendor_command - vendor command definition * @info: vendor command identifying information, as used in nl80211 @@ -4159,6 +4182,10 @@ struct sta_opmode_info { * @dumpit: dump callback, for transferring bigger/multiple items. The * @storage points to cb->args[5], ie. is preserved over the multiple * dumpit calls. + * @policy: policy pointer for attributes within %NL80211_ATTR_VENDOR_DATA. + * Set this to %VENDOR_CMD_RAW_DATA if no policy can be given and the + * attribute is just raw data (e.g. a firmware command). + * @maxattr: highest attribute number in policy * It's recommended to not have the same sub command with both @doit and * @dumpit, so that userspace can assume certain ones are get and others * are used with dump requests. @@ -4171,6 +4198,8 @@ struct wiphy_vendor_command { int (*dumpit)(struct wiphy *wiphy, struct wireless_dev *wdev, struct sk_buff *skb, const void *data, int data_len, unsigned long *storage); + const struct nla_policy *policy; + unsigned int maxattr; }; /** @@ -5719,6 +5748,26 @@ void cfg80211_put_bss(struct wiphy *wiphy, struct cfg80211_bss *bss); */ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss); +/** + * cfg80211_bss_iter - iterate all BSS entries + * + * This function iterates over the BSS entries associated with the given wiphy + * and calls the callback for the iterated BSS. The iterator function is not + * allowed to call functions that might modify the internal state of the BSS DB. + * + * @wiphy: the wiphy + * @chandef: if given, the iterator function will be called only if the channel + * of the currently iterated BSS is a subset of the given channel. + * @iter: the iterator function to call + * @iter_data: an argument to the iterator function + */ +void cfg80211_bss_iter(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + void (*iter)(struct wiphy *wiphy, + struct cfg80211_bss *bss, + void *data), + void *iter_data); + static inline enum nl80211_bss_scan_width cfg80211_chandef_to_scan_width(const struct cfg80211_chan_def *chandef) { @@ -6229,8 +6278,11 @@ struct cfg80211_fils_resp_params { * case. * @bssid: The BSSID of the AP (may be %NULL) * @bss: Entry of bss to which STA got connected to, can be obtained through - * cfg80211_get_bss() (may be %NULL). Only one parameter among @bssid and - * @bss needs to be specified. + * cfg80211_get_bss() (may be %NULL). But it is recommended to store the + * bss from the connect_request and hold a reference to it and return + * through this param to avoid a warning if the bss is expired during the + * connection, esp. for those drivers implementing connect op. + * Only one parameter among @bssid and @bss needs to be specified. * @req_ie: Association request IEs (may be %NULL) * @req_ie_len: Association request IEs length * @resp_ie: Association response IEs (may be %NULL) @@ -6278,8 +6330,12 @@ void cfg80211_connect_done(struct net_device *dev, * * @dev: network device * @bssid: the BSSID of the AP - * @bss: entry of bss to which STA got connected to, can be obtained - * through cfg80211_get_bss (may be %NULL) + * @bss: Entry of bss to which STA got connected to, can be obtained through + * cfg80211_get_bss() (may be %NULL). But it is recommended to store the + * bss from the connect_request and hold a reference to it and return + * through this param to avoid a warning if the bss is expired during the + * connection, esp. for those drivers implementing connect op. + * Only one parameter among @bssid and @bss needs to be specified. * @req_ie: association request IEs (maybe be %NULL) * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) @@ -6490,6 +6546,16 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, gfp_t gfp); /** + * cfg80211_tx_mgmt_expired - tx_mgmt duration expired + * @wdev: wireless device + * @cookie: the requested cookie + * @chan: The current channel (from tx_mgmt request) + * @gfp: allocation flags + */ +void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, + struct ieee80211_channel *chan, gfp_t gfp); + +/** * cfg80211_sinfo_alloc_tid_stats - allocate per-tid statistics. * * @sinfo: the station information diff --git a/include/net/devlink.h b/include/net/devlink.h index c9fbeb5b701f..bc36f942a7d5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -13,6 +13,7 @@ #include <linux/list.h> #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/workqueue.h> #include <net/net_namespace.h> #include <uapi/linux/devlink.h> @@ -37,14 +38,34 @@ struct devlink { char priv[0] __aligned(NETDEV_ALIGN); }; +struct devlink_port_phys_attrs { + u32 port_number; /* Same value as "split group". + * A physical port which is visible to the user + * for a given port flavour. + */ + u32 split_subport_number; +}; + +struct devlink_port_pci_pf_attrs { + u16 pf; /* Associated PCI PF for this port. */ +}; + +struct devlink_port_pci_vf_attrs { + u16 pf; /* Associated PCI PF for this port. */ + u16 vf; /* Associated PCI VF for of the PCI PF for this port. */ +}; + struct devlink_port_attrs { u8 set:1, split:1, switch_port:1; enum devlink_port_flavour flavour; - u32 port_number; /* same value as "split group" */ - u32 split_subport_number; struct netdev_phys_item_id switch_id; + union { + struct devlink_port_phys_attrs phys; + struct devlink_port_pci_pf_attrs pci_pf; + struct devlink_port_pci_vf_attrs pci_vf; + }; }; struct devlink_port { @@ -60,6 +81,7 @@ struct devlink_port { enum devlink_port_type desired_type; void *type_dev; struct devlink_port_attrs attrs; + struct delayed_work type_warn_dw; }; struct devlink_sb_pool_info { @@ -526,8 +548,10 @@ struct devlink_ops { int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode); int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode, struct netlink_ext_ack *extack); - int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode); - int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode, + int (*eswitch_encap_mode_get)(struct devlink *devlink, + enum devlink_eswitch_encap_mode *p_encap_mode); + int (*eswitch_encap_mode_set)(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap_mode, struct netlink_ext_ack *extack); int (*info_get)(struct devlink *devlink, struct devlink_info_req *req, struct netlink_ext_ack *extack); @@ -586,6 +610,13 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, u32 split_subport_number, const unsigned char *switch_id, unsigned char switch_id_len); +void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, + const unsigned char *switch_id, + unsigned char switch_id_len, u16 pf); +void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, + const unsigned char *switch_id, + unsigned char switch_id_len, + u16 pf, u16 vf); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -735,6 +766,14 @@ void devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, enum devlink_health_reporter_state state); +void devlink_flash_update_begin_notify(struct devlink *devlink); +void devlink_flash_update_end_notify(struct devlink *devlink); +void devlink_flash_update_status_notify(struct devlink *devlink, + const char *status_msg, + const char *component, + unsigned long done, + unsigned long total); + #if IS_ENABLED(CONFIG_NET_DEVLINK) void devlink_compat_running_version(struct net_device *dev, diff --git a/include/net/dsa.h b/include/net/dsa.h index ba6dfff98196..1e8650fa8acc 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -18,6 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/platform_data/dsa.h> +#include <linux/phylink.h> #include <net/devlink.h> #include <net/switchdev.h> @@ -180,7 +181,7 @@ struct dsa_port { struct dsa_switch *ds; unsigned int index; const char *name; - const struct dsa_port *cpu_dp; + struct dsa_port *cpu_dp; const char *mac; struct device_node *dn; unsigned int ageing_time; @@ -189,6 +190,7 @@ struct dsa_port { struct net_device *bridge_dev; struct devlink_port devlink_port; struct phylink *pl; + struct phylink_config pl_config; struct work_struct xmit_work; struct sk_buff_head xmit_queue; @@ -355,6 +357,7 @@ struct dsa_switch_ops { int port); int (*setup)(struct dsa_switch *ds); + void (*teardown)(struct dsa_switch *ds); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* diff --git a/include/net/dst.h b/include/net/dst.h index 12b31c602cb0..fe62fe2eb781 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -183,7 +183,7 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) } /* Kernel-internal feature bits that are unallocated in user space. */ -#define DST_FEATURE_ECN_CA (1 << 31) +#define DST_FEATURE_ECN_CA (1U << 31) #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) @@ -302,8 +302,9 @@ static inline bool dst_hold_safe(struct dst_entry *dst) * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. + * Returns true if dst is refcounted. */ -static inline void skb_dst_force(struct sk_buff *skb) +static inline bool skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); @@ -314,6 +315,8 @@ static inline void skb_dst_force(struct sk_buff *skb) skb->_skb_refdst = (unsigned long)dst; } + + return skb->_skb_refdst != 0UL; } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b473df5b9512..eba8465e1d86 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -103,6 +103,7 @@ struct fib_rule_notifier_info { }; #define FRA_GENERIC_POLICY \ + [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \ [FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index dfabc0503446..90bd210be060 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -200,6 +200,28 @@ struct flow_dissector_key_ip { __u8 ttl; }; +/** + * struct flow_dissector_key_meta: + * @ingress_ifindex: ingress ifindex + */ +struct flow_dissector_key_meta { + int ingress_ifindex; +}; + +/** + * struct flow_dissector_key_ct: + * @ct_state: conntrack state after converting with map + * @ct_mark: conttrack mark + * @ct_zone: conntrack zone + * @ct_labels: conntrack labels + */ +struct flow_dissector_key_ct { + u16 ct_state; + u16 ct_zone; + u32 ct_mark; + u32 ct_labels[4]; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -225,14 +247,15 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CVLAN, /* struct flow_dissector_key_vlan */ FLOW_DISSECTOR_KEY_ENC_IP, /* struct flow_dissector_key_ip */ FLOW_DISSECTOR_KEY_ENC_OPTS, /* struct flow_dissector_key_enc_opts */ + FLOW_DISSECTOR_KEY_META, /* struct flow_dissector_key_meta */ + FLOW_DISSECTOR_KEY_CT, /* struct flow_dissector_key_ct */ FLOW_DISSECTOR_KEY_MAX, }; #define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0) -#define FLOW_DISSECTOR_F_STOP_AT_L3 BIT(1) -#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(2) -#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(3) +#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1) +#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2) struct flow_dissector_key { enum flow_dissector_key_id key_id; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index a2df99f9b196..db337299e81e 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -1,7 +1,9 @@ #ifndef _NET_FLOW_OFFLOAD_H #define _NET_FLOW_OFFLOAD_H +#include <linux/kernel.h> #include <net/flow_dissector.h> +#include <net/sch_generic.h> struct flow_match { struct flow_dissector *dissector; @@ -9,6 +11,10 @@ struct flow_match { void *key; }; +struct flow_match_meta { + struct flow_dissector_key_meta *key, *mask; +}; + struct flow_match_basic { struct flow_dissector_key_basic *key, *mask; }; @@ -63,6 +69,8 @@ struct flow_match_enc_opts { struct flow_rule; +void flow_rule_match_meta(const struct flow_rule *rule, + struct flow_match_meta *out); void flow_rule_match_basic(const struct flow_rule *rule, struct flow_match_basic *out); void flow_rule_match_control(const struct flow_rule *rule, @@ -122,6 +130,7 @@ enum flow_action_id { FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, FLOW_ACTION_POLICE, + FLOW_ACTION_CT, }; /* This is mirroring enum pedit_header_type definition for easy mapping between @@ -171,6 +180,10 @@ struct flow_action_entry { s64 burst; u64 rate_bytes_ps; } police; + struct { /* FLOW_ACTION_CT */ + int action; + u16 zone; + } ct; }; }; @@ -225,4 +238,99 @@ static inline void flow_stats_update(struct flow_stats *flow_stats, flow_stats->lastused = max_t(u64, flow_stats->lastused, lastused); } +enum flow_block_command { + FLOW_BLOCK_BIND, + FLOW_BLOCK_UNBIND, +}; + +enum flow_block_binder_type { + FLOW_BLOCK_BINDER_TYPE_UNSPEC, + FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, + FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS, +}; + +struct netlink_ext_ack; + +struct flow_block_offload { + enum flow_block_command command; + enum flow_block_binder_type binder_type; + bool block_shared; + struct net *net; + struct list_head cb_list; + struct list_head *driver_block_list; + struct netlink_ext_ack *extack; +}; + +struct flow_block_cb { + struct list_head driver_list; + struct list_head list; + struct net *net; + tc_setup_cb_t *cb; + void *cb_ident; + void *cb_priv; + void (*release)(void *cb_priv); + unsigned int refcnt; +}; + +struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, + void *cb_ident, void *cb_priv, + void (*release)(void *cb_priv)); +void flow_block_cb_free(struct flow_block_cb *block_cb); + +struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload, + tc_setup_cb_t *cb, void *cb_ident); + +void *flow_block_cb_priv(struct flow_block_cb *block_cb); +void flow_block_cb_incref(struct flow_block_cb *block_cb); +unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb); + +static inline void flow_block_cb_add(struct flow_block_cb *block_cb, + struct flow_block_offload *offload) +{ + list_add_tail(&block_cb->list, &offload->cb_list); +} + +static inline void flow_block_cb_remove(struct flow_block_cb *block_cb, + struct flow_block_offload *offload) +{ + list_move(&block_cb->list, &offload->cb_list); +} + +bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident, + struct list_head *driver_block_list); + +int flow_block_cb_setup_simple(struct flow_block_offload *f, + struct list_head *driver_list, tc_setup_cb_t *cb, + void *cb_ident, void *cb_priv, bool ingress_only); + +enum flow_cls_command { + FLOW_CLS_REPLACE, + FLOW_CLS_DESTROY, + FLOW_CLS_STATS, + FLOW_CLS_TMPLT_CREATE, + FLOW_CLS_TMPLT_DESTROY, +}; + +struct flow_cls_common_offload { + u32 chain_index; + __be16 protocol; + u32 prio; + struct netlink_ext_ack *extack; +}; + +struct flow_cls_offload { + struct flow_cls_common_offload common; + enum flow_cls_command command; + unsigned long cookie; + struct flow_rule *rule; + struct flow_stats stats; + u32 classid; +}; + +static inline struct flow_rule * +flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd) +{ + return flow_cmd->rule; +} + #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/gue.h b/include/net/gue.h index fdad41469b65..3a6595bfa641 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -60,7 +60,7 @@ struct guehdr { /* Private flags in the private option extension */ -#define GUE_PFLAG_REMCSUM htonl(1 << 31) +#define GUE_PFLAG_REMCSUM htonl(1U << 31) #define GUE_PLEN_REMCSUM 4 #define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM) diff --git a/include/net/hwbm.h b/include/net/hwbm.h index 89085e2e2da5..81643cf8a1c4 100644 --- a/include/net/hwbm.h +++ b/include/net/hwbm.h @@ -12,18 +12,18 @@ struct hwbm_pool { /* constructor called during alocation */ int (*construct)(struct hwbm_pool *bm_pool, void *buf); /* protect acces to the buffer counter*/ - spinlock_t lock; + struct mutex buf_lock; /* private data */ void *priv; }; #ifdef CONFIG_HWBM void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf); int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp); -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp); +int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num); #else void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {} int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; } -int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp) +int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num) { return 0; } #endif /* CONFIG_HWBM */ #endif /* _HWBM_H */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 975901a95c0f..ae2ba897675c 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -25,6 +25,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern); +int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 378904ee9129..010f26b31c89 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -3,19 +3,24 @@ #define __NET_FRAG_H__ #include <linux/rhashtable-types.h> +#include <linux/completion.h> -struct netns_frags { +/* Per netns frag queues directory */ +struct fqdir { /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; + struct net *net; + bool dead; struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; + struct work_struct destroy_work; }; /** @@ -24,11 +29,13 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction + * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), + INET_FRAG_HASH_DEAD = BIT(3), }; struct frag_v4_compare_key { @@ -64,7 +71,7 @@ struct frag_v6_compare_key { * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size - * @net: namespace that this frag belongs to + * @fqdir: pointer to struct fqdir * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { @@ -84,7 +91,7 @@ struct inet_frag_queue { int meat; __u8 flags; u16 max_size; - struct netns_frags *net; + struct fqdir *fqdir; struct rcu_head rcu; }; @@ -98,21 +105,25 @@ struct inet_frags { struct kmem_cache *frags_cachep; const char *frags_cache_name; struct rhashtable_params rhash_params; + refcount_t refcnt; + struct completion completion; }; int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); + +static inline void fqdir_pre_exit(struct fqdir *fqdir) { - atomic_long_set(&nf->mem, 0); - return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); + fqdir->high_thresh = 0; /* prevent creation of new frags */ + fqdir->dead = true; } -void inet_frags_exit_net(struct netns_frags *nf); +void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); +struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ unsigned int inet_frag_rbtree_purge(struct rb_root *root); @@ -125,19 +136,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -static inline long frag_mem_limit(const struct netns_frags *nf) +static inline long frag_mem_limit(const struct fqdir *fqdir) { - return atomic_long_read(&nf->mem); + return atomic_long_read(&fqdir->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) +static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_sub(val, &nf->mem); + atomic_long_sub(val, &fqdir->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, long val) +static inline void add_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_add(val, &nf->mem); + atomic_long_add(val, &fqdir->mem); } /* RFC 3168 support : diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c2f756aedc54..aef38c140014 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -70,6 +70,7 @@ struct inet_timewait_sock { tw_flowlabel : 20, tw_pad : 2, /* 2 bits hole */ tw_tos : 8; + u32 tw_txhash; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; }; diff --git a/include/net/ip.h b/include/net/ip.h index 49c672c8cdae..29d89de39822 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -161,6 +161,44 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); + +struct ip_fraglist_iter { + struct sk_buff *frag; + struct iphdr *iph; + int offset; + unsigned int hlen; +}; + +void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, + unsigned int hlen, struct ip_fraglist_iter *iter); +void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter); + +static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter) +{ + struct sk_buff *skb = iter->frag; + + iter->frag = skb->next; + skb_mark_not_on_list(skb); + + return skb; +} + +struct ip_frag_state { + struct iphdr *iph; + unsigned int hlen; + unsigned int ll_rs; + unsigned int mtu; + unsigned int left; + int offset; + int ptr; + __be16 not_last_frag; +}; + +void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs, + unsigned int mtu, struct ip_frag_state *state); +struct sk_buff *ip_frag_next(struct sk_buff *skb, + struct ip_frag_state *state); + void ip_send_check(struct iphdr *ip); int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -241,7 +279,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, const struct ip_options *sopt, __be32 daddr, __be32 saddr, const struct ip_reply_arg *arg, - unsigned int len); + unsigned int len, u64 transmit_time); #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field) #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 855b352b660f..4b5656c71abc 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -49,6 +49,7 @@ struct fib6_config { u16 fc_delete_all_nh : 1, fc_ignore_dev_down:1, __unused : 14; + u32 fc_nh_id; struct in6_addr fc_dst; struct in6_addr fc_src; @@ -127,6 +128,9 @@ struct fib6_nh { #ifdef CONFIG_IPV6_ROUTER_PREF unsigned long last_probe; #endif + + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; }; struct fib6_info { @@ -139,7 +143,10 @@ struct fib6_info { * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head fib6_siblings; + union { + struct list_head fib6_siblings; + struct list_head nh_list; + }; unsigned int fib6_nsiblings; refcount_t fib6_ref; @@ -152,22 +159,19 @@ struct fib6_info { struct rt6key fib6_src; struct rt6key fib6_prefsrc; - struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; - u32 fib6_metric; u8 fib6_protocol; u8 fib6_type; - u8 exception_bucket_flushed:1, - should_flush:1, + u8 should_flush:1, dst_nocount:1, dst_nopolicy:1, dst_host:1, fib6_destroying:1, - unused:2; + unused:3; - struct fib6_nh fib6_nh; struct rcu_head rcu; + struct nexthop *nh; + struct fib6_nh fib6_nh[0]; }; struct rt6_info { @@ -276,7 +280,7 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh); void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) @@ -312,6 +316,7 @@ struct fib6_walker { enum fib6_walk_state state; unsigned int skip; unsigned int count; + unsigned int skip_in_node; int (*func)(struct fib6_walker *); void *args; }; @@ -373,6 +378,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; + unsigned int nsiblings; }; /* @@ -437,16 +443,22 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) rcu_read_unlock(); } -static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) -{ - return f6i->fib6_nh.fib_nh_dev; -} - int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib6_info *rt, + struct netlink_ext_ack *extack); +int call_fib6_multipath_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib6_info *rt, + unsigned int nsiblings, + struct netlink_ext_ack *extack); +void fib6_rt_update(struct net *net, struct fib6_info *rt, + struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); @@ -480,6 +492,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb); void fib6_update_sernum(struct net *net, struct fib6_info *rt); void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i); void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ee7405e759ba..b69c16cbbf71 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -27,6 +27,7 @@ struct route_info { #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/route.h> +#include <net/nexthop.h> #define RT6_LOOKUP_F_IFACE 0x00000001 #define RT6_LOOKUP_F_REACHABLE 0x00000002 @@ -35,6 +36,7 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 #define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 +#define RT6_LOOKUP_F_DST_NOREF 0x00000080 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header @@ -66,11 +68,14 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* fib entries using a nexthop object can not be coalesced into + * a multipath route + */ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { /* the RTF_ADDRCONF flag filters out RA's */ - return !(f6i->fib6_flags & RTF_ADDRCONF) && - f6i->fib6_nh.fib_nh_gw_family; + return !(f6i->fib6_flags & RTF_ADDRCONF) && !f6i->nh && + f6i->fib6_nh->fib_nh_gw_family; } void ip6_route_input(struct sk_buff *skb); @@ -79,6 +84,10 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); +struct dst_entry *ip6_route_output_flags_noref(struct net *net, + const struct sock *sk, + struct flowi6 *fl6, int flags); + struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); @@ -89,6 +98,16 @@ static inline struct dst_entry *ip6_route_output(struct net *net, return ip6_route_output_flags(net, sk, fl6, 0); } +/* Only conditionally release dst if flags indicates + * !RT6_LOOKUP_F_DST_NOREF or dst is in uncached_list. + */ +static inline void ip6_rt_put_flags(struct rt6_info *rt, int flags) +{ + if (!(flags & RT6_LOOKUP_F_DST_NOREF) || + !list_empty(&rt->rt6i_uncached)) + ip6_rt_put(rt); +} + struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, @@ -178,7 +197,7 @@ struct rt6_rtnl_dump_arg { struct fib_dump_filter filter; }; -int rt6_dump_route(struct fib6_info *f6i, void *p_arg); +int rt6_dump_route(struct fib6_info *f6i, void *p_arg, unsigned int skip); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); @@ -275,8 +294,13 @@ static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + struct fib6_nh *nha, *nhb; + + if (a->nh || b->nh) + return nexthop_cmp(a->nh, b->nh); + nha = a->fib6_nh; + nhb = b->fib6_nh; return nha->fib_nh_dev == nhb->fib_nh_dev && ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index bbeff32fb6cb..4c81846ccce8 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -40,6 +40,7 @@ struct fib_config { u32 fc_flags; u32 fc_priority; __be32 fc_prefsrc; + u32 fc_nh_id; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; @@ -125,9 +126,12 @@ struct fib_nh { * This structure contains data shared by many of routes. */ +struct nexthop; + struct fib_info { struct hlist_node fib_hash; struct hlist_node fib_lhash; + struct list_head nh_list; struct net *fib_net; int fib_treeref; refcount_t fib_clntref; @@ -146,9 +150,10 @@ struct fib_info { #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; bool fib_nh_is_v6; + bool nh_updated; + struct nexthop *nh; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].fib_nh_dev }; @@ -185,18 +190,14 @@ struct fib_result_nl { int err; }; -static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) -{ - return &fi->fib_nh[nhsel].nh_common; -} - #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 #else #define FIB_TABLE_HASHSZ 2 #endif -__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, + unsigned char scope); __be32 fib_result_prefsrc(struct net *net, struct fib_result *res); #define FIB_RES_NHC(res) ((res).nhc) @@ -227,6 +228,7 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, int __net_init fib4_notifier_init(struct net *net); void __net_exit fib4_notifier_exit(struct net *net); +void fib_info_notify_update(struct net *net, struct nl_info *info); void fib_notify(struct net *net, struct notifier_block *nb); struct fib_table { @@ -243,6 +245,8 @@ struct fib_dump_filter { /* filter_set is an optimization that an entry is set */ bool filter_set; bool dump_all_families; + bool dump_routes; + bool dump_exceptions; unsigned char protocol; unsigned char rt_type; unsigned int flags; @@ -425,11 +429,14 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); +void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif +int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, + struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); @@ -451,11 +458,18 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID struct fib_nh_common *nhc = res->nhc; - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = nh->nh_tclassid << 16; + if (nhc->nhc_family == AF_INET) { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + *itag = nh->nh_tclassid << 16; + } else { + *itag = 0; + } + #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -465,6 +479,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) #endif } +void fib_flush(struct net *net); void free_fib_info(struct fib_info *fi); static inline void fib_info_hold(struct fib_info *fi) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2ac40135b576..3759167f91f5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -603,6 +603,7 @@ struct ip_vs_dest_user_kern { u16 tun_type; /* tunnel type */ __be16 tun_port; /* tunnel port */ + u16 tun_flags; /* tunnel flags */ }; @@ -665,6 +666,7 @@ struct ip_vs_dest { atomic_t last_weight; /* server latest weight */ __u16 tun_type; /* tunnel type */ __be16 tun_port; /* tunnel port */ + __u16 tun_flags; /* tunnel flags */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ @@ -808,11 +810,12 @@ struct ipvs_master_sync_state { struct ip_vs_sync_buff *sync_buff; unsigned long sync_queue_len; unsigned int sync_queue_delay; - struct task_struct *master_thread; struct delayed_work master_wakeup_work; struct netns_ipvs *ipvs; }; +struct ip_vs_sync_thread_data; + /* How much time to keep dests in trash */ #define IP_VS_DEST_TRASH_PERIOD (120 * HZ) @@ -943,7 +946,8 @@ struct netns_ipvs { spinlock_t sync_lock; struct ipvs_master_sync_state *ms; spinlock_t sync_buff_lock; - struct task_struct **backup_threads; + struct ip_vs_sync_thread_data *master_tinfo; + struct ip_vs_sync_thread_data *backup_tinfo; int threads_mask; volatile int sync_state; struct mutex sync_mutex; @@ -1404,6 +1408,9 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, struct ip_vs_dest * ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); +struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, + const union nf_inet_addr *daddr, + __be16 tun_port); int ip_vs_use_count_inc(void); void ip_vs_use_count_dec(void); @@ -1497,6 +1504,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs) static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; } #endif +#define IP_VS_DFWD_METHOD(dest) (atomic_read(&(dest)->conn_flags) & \ + IP_VS_CONN_F_FWD_MASK) + /* ip_vs_fwd_tag returns the forwarding tag of the connection */ #define IP_VS_FWD_METHOD(cp) (cp->flags & IP_VS_CONN_F_FWD_MASK) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 60d9480bc4d1..8dfc65639aa4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -13,6 +13,7 @@ #include <linux/hardirq.h> #include <linux/jhash.h> #include <linux/refcount.h> +#include <linux/jump_label_ratelimit.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/flow.h> @@ -150,6 +151,49 @@ struct frag_hdr { #define IP6_MF 0x0001 #define IP6_OFFSET 0xFFF8 +struct ip6_fraglist_iter { + struct ipv6hdr *tmp_hdr; + struct sk_buff *frag; + int offset; + unsigned int hlen; + __be32 frag_id; + u8 nexthdr; +}; + +int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, + u8 nexthdr, __be32 frag_id, + struct ip6_fraglist_iter *iter); +void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter); + +static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter) +{ + struct sk_buff *skb = iter->frag; + + iter->frag = skb->next; + skb_mark_not_on_list(skb); + + return skb; +} + +struct ip6_frag_state { + u8 *prevhdr; + unsigned int hlen; + unsigned int mtu; + unsigned int left; + int offset; + int ptr; + int hroom; + int troom; + __be32 frag_id; + u8 nexthdr; +}; + +void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, + unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, + u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state); +struct sk_buff *ip6_frag_next(struct sk_buff *skb, + struct ip6_frag_state *state); + #define IP6_REPLY_MARK(net, mark) \ ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) @@ -258,6 +302,13 @@ struct ipv6_txoptions { /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; +/* flowlabel_reflect sysctl values */ +enum flowlabel_reflect { + FLOWLABEL_REFLECT_ESTABLISHED = 1, + FLOWLABEL_REFLECT_TCP_RESET = 2, + FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES = 4, +}; + struct ip6_flowlabel { struct ip6_flowlabel __rcu *next; __be32 label; @@ -339,7 +390,18 @@ static inline void txopt_put(struct ipv6_txoptions *opt) kfree_rcu(opt, rcu); } -struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); +struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); + +extern struct static_key_false_deferred ipv6_flowlabel_exclusive; +static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, + __be32 label) +{ + if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) + return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); + + return NULL; +} + struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, struct ipv6_txoptions *fopt); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 1f77fb4dc79d..a21e8b1381a1 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,6 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); + if (fq->q.fqdir->dead) + goto out_rcu_unlock; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 6c0c4fde16f8..5c93e942c50b 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -45,6 +45,11 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); + int (*ip6_del_rt)(struct net *net, struct fib6_info *rt); + void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, + struct nl_info *info); + void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 456f2edf78dc..d26da013f7c0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -314,6 +314,7 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_MCAST_RATE: Multicast Rate setting changed for this interface * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder * functionality changed for this BSS (AP mode). + * @BSS_CHANGED_TWT: TWT status changed * */ enum ieee80211_bss_change { @@ -344,6 +345,7 @@ enum ieee80211_bss_change { BSS_CHANGED_KEEP_ALIVE = 1<<24, BSS_CHANGED_MCAST_RATE = 1<<25, BSS_CHANGED_FTM_RESPONDER = 1<<26, + BSS_CHANGED_TWT = 1<<27, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -501,6 +503,8 @@ struct ieee80211_ftm_responder_params { * @he_support: does this BSS support HE * @twt_requester: does this BSS support TWT requester (relevant for managed * mode only, set if the AP advertises TWT responder role) + * @twt_responder: does this BSS support TWT requester (relevant for managed + * mode only, set if the AP advertises TWT responder role) * @assoc: association status * @ibss_joined: indicates whether this station is part of an IBSS * or not @@ -608,6 +612,7 @@ struct ieee80211_bss_conf { u16 frame_time_rts_th; bool he_support; bool twt_requester; + bool twt_responder; /* association related data */ bool assoc, ibss_joined; bool ibss_creator; @@ -2266,6 +2271,9 @@ struct ieee80211_txq { * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended * Key ID and can handle two unicast keys per station for Rx and Tx. * + * @IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT: The card/driver can't handle + * active Tx A-MPDU sessions with Extended Key IDs during rekey. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2318,6 +2326,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_MULTI_BSSID, IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, IEEE80211_HW_EXT_KEY_ID_NATIVE, + IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -5948,29 +5957,6 @@ static inline int rate_supported(struct ieee80211_sta *sta, return (sta == NULL || sta->supp_rates[band] & BIT(index)); } -/** - * rate_control_send_low - helper for drivers for management/no-ack frames - * - * Rate control algorithms that agree to use the lowest rate to - * send management frames and NO_ACK data with the respective hw - * retries should use this in the beginning of their mac80211 get_rate - * callback. If true is returned the rate control can simply return. - * If false is returned we guarantee that sta and sta and priv_sta is - * not null. - * - * Rate control algorithms wishing to do more intelligent selection of - * rate for multicast/broadcast frames may choose to not use this. - * - * @sta: &struct ieee80211_sta pointer to the target destination. Note - * that this may be null. - * @priv_sta: private rate control structure. This may be null. - * @txrc: rate control information we sholud populate for mac80211. - */ -bool rate_control_send_low(struct ieee80211_sta *sta, - void *priv_sta, - struct ieee80211_tx_rate_control *txrc); - - static inline s8 rate_lowest_index(struct ieee80211_supported_band *sband, struct ieee80211_sta *sta) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index a56bf7fc7c2b..4a9da951a794 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -19,6 +19,7 @@ #include <net/netns/packet.h> #include <net/netns/ipv4.h> #include <net/netns/ipv6.h> +#include <net/netns/nexthop.h> #include <net/netns/ieee802154_6lowpan.h> #include <net/netns/sctp.h> #include <net/netns/dccp.h> @@ -111,6 +112,7 @@ struct net { struct netns_mib mib; struct netns_packet packet; struct netns_unix unx; + struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) struct netns_ipv6 ipv6; @@ -356,8 +358,13 @@ struct pernet_operations { * synchronize_rcu() related to these pernet_operations, * instead of separate synchronize_rcu() for every net. * Please, avoid synchronize_rcu() at all, where it's possible. + * + * Note that a combination of pre_exit() and exit() can + * be used, since a synchronize_rcu() is guaranteed between + * the calls. */ int (*init)(struct net *net); + void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); unsigned int *id; diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index 89808ce293c4..302fcd3aade2 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -42,7 +42,8 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) return port ? &port->br->fake_rtable : NULL; } -struct net_device *setup_pre_routing(struct sk_buff *skb); +struct net_device *setup_pre_routing(struct sk_buff *skb, + const struct net *net); #if IS_ENABLED(CONFIG_IPV6) int br_validate_ipv6(struct net *net, struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index d2bc733a2ef1..c86657d99630 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -49,6 +49,7 @@ union nf_conntrack_expect_proto { struct nf_conntrack_net { unsigned int users4; unsigned int users6; + unsigned int users_bridge; }; #include <linux/types.h> @@ -69,7 +70,8 @@ struct nf_conn { struct nf_conntrack ct_general; spinlock_t lock; - u16 cpu; + /* jiffies32 when this ct is considered dead */ + u32 timeout; #ifdef CONFIG_NF_CONNTRACK_ZONES struct nf_conntrack_zone zone; @@ -81,9 +83,7 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* jiffies32 when this ct is considered dead */ - u32 timeout; - + u16 cpu; possible_net_t ct_net; #if IS_ENABLED(CONFIG_NF_NAT) diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h new file mode 100644 index 000000000000..9a5514d5bc51 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_bridge.h @@ -0,0 +1,20 @@ +#ifndef NF_CONNTRACK_BRIDGE_ +#define NF_CONNTRACK_BRIDGE_ + +struct nf_ct_bridge_info { + struct nf_hook_ops *ops; + unsigned int ops_size; + struct module *me; +}; + +void nf_ct_bridge_register(struct nf_ct_bridge_info *info); +void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info); + +struct nf_ct_bridge_frag_data { + char mac[ETH_HLEN]; + bool vlan_present; + u16 vlan_tci; + __be16 vlan_proto; +}; + +#endif diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index ae41e92251dd..de10faf2ce91 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) return ret; } +unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo); + void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 2c7559a54092..8f00125b06f4 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -2,6 +2,7 @@ #ifndef _NF_CONNTRACK_SYNPROXY_H #define _NF_CONNTRACK_SYNPROXY_H +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netns/generic.h> struct nf_conn_synproxy { @@ -72,21 +73,12 @@ struct synproxy_options { }; struct tcphdr; -struct xt_synproxy_info; +struct nf_synproxy_info; bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts); -unsigned int synproxy_options_size(const struct synproxy_options *opts); -void synproxy_build_options(struct tcphdr *th, - const struct synproxy_options *opts); -void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info, +void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts); -void synproxy_check_timestamp_cookie(struct synproxy_options *opts); - -unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, - struct tcphdr *th, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct nf_conn_synproxy *synproxy); #endif /* _NF_CONNTRACK_SYNPROXY_H */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 3e370cb36263..d8c187936bec 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -53,8 +53,6 @@ struct flow_offload_tuple { u8 l4proto; u8 dir; - int oifidx; - u16 mtu; struct dst_entry *dst_cache; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 7239105d9d2e..3cb6dcf53a4e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -120,6 +120,5 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, } int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - const struct nf_hook_entries *entries, unsigned int index, - unsigned int verdict); + unsigned int index, unsigned int verdict); #endif /* _NF_QUEUE_H */ diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h new file mode 100644 index 000000000000..87d73fb5279d --- /dev/null +++ b/include/net/netfilter/nf_synproxy.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NF_SYNPROXY_SHARED_H +#define _NF_SYNPROXY_SHARED_H + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/ip6_checksum.h> +#include <net/ip6_route.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netfilter/nf_conntrack_synproxy.h> + +void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, + const struct tcphdr *th, + const struct synproxy_options *opts); + +bool synproxy_recv_client_ack(struct net *net, + const struct sk_buff *skb, + const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq); + +unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *nhs); +int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net); +void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net); + +#if IS_ENABLED(CONFIG_IPV6) +void synproxy_send_client_synack_ipv6(struct net *net, + const struct sk_buff *skb, + const struct tcphdr *th, + const struct synproxy_options *opts); + +bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb, + const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq); + +unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *nhs); +int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net); +void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net); +#else +static inline int +nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { return 0; } +static inline void +nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) {}; +#endif /* CONFIG_IPV6 */ + +#endif /* _NF_SYNPROXY_SHARED_H */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5b8624ae4a27..35dfdd9f69b3 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -161,6 +161,7 @@ struct nft_ctx { const struct nlattr * const *nla; u32 portid; u32 seq; + u16 flags; u8 family; u8 level; bool report; @@ -636,7 +637,7 @@ static inline struct nft_object **nft_set_ext_obj(const struct nft_set_ext *ext) void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *data, - u64 timeout, gfp_t gfp); + u64 timeout, u64 expiration, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); @@ -735,6 +736,9 @@ enum nft_trans_phase { NFT_TRANS_RELEASE }; +struct nft_flow_rule; +struct nft_offload_ctx; + /** * struct nft_expr_ops - nf_tables expression operations * @@ -777,6 +781,10 @@ struct nft_expr_ops { const struct nft_data **data); bool (*gc)(struct net *net, const struct nft_expr *expr); + int (*offload)(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr); + u32 offload_flags; const struct nft_expr_type *type; void *data; }; @@ -859,6 +867,7 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, + NFT_CHAIN_HW_OFFLOAD = 0x2, }; /** @@ -942,6 +951,7 @@ struct nft_stats { * @stats: per-cpu chain stats * @chain: the chain * @dev_name: device name that this base chain is attached to (if any) + * @cb_list: list of flow block callbacks (for hardware offload) */ struct nft_base_chain { struct nf_hook_ops ops; @@ -951,6 +961,7 @@ struct nft_base_chain { struct nft_stats __percpu *stats; struct nft_chain chain; char dev_name[IFNAMSIZ]; + struct list_head cb_list; }; static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain) @@ -1322,11 +1333,14 @@ struct nft_trans { struct nft_trans_rule { struct nft_rule *rule; + struct nft_flow_rule *flow; u32 rule_id; }; #define nft_trans_rule(trans) \ (((struct nft_trans_rule *)trans->data)->rule) +#define nft_trans_flow_rule(trans) \ + (((struct nft_trans_rule *)trans->data)->flow) #define nft_trans_rule_id(trans) \ (((struct nft_trans_rule *)trans->data)->rule_id) diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h new file mode 100644 index 000000000000..3196663a10e3 --- /dev/null +++ b/include/net/netfilter/nf_tables_offload.h @@ -0,0 +1,76 @@ +#ifndef _NET_NF_TABLES_OFFLOAD_H +#define _NET_NF_TABLES_OFFLOAD_H + +#include <net/flow_offload.h> +#include <net/netfilter/nf_tables.h> + +struct nft_offload_reg { + u32 key; + u32 len; + u32 base_offset; + u32 offset; + struct nft_data mask; +}; + +enum nft_offload_dep_type { + NFT_OFFLOAD_DEP_UNSPEC = 0, + NFT_OFFLOAD_DEP_NETWORK, + NFT_OFFLOAD_DEP_TRANSPORT, +}; + +struct nft_offload_ctx { + struct { + enum nft_offload_dep_type type; + __be16 l3num; + u8 protonum; + } dep; + unsigned int num_actions; + struct nft_offload_reg regs[NFT_REG32_15 + 1]; +}; + +void nft_offload_set_dependency(struct nft_offload_ctx *ctx, + enum nft_offload_dep_type type); +void nft_offload_update_dependency(struct nft_offload_ctx *ctx, + const void *data, u32 len); + +struct nft_flow_key { + struct flow_dissector_key_basic basic; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + }; + struct flow_dissector_key_ports tp; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_vlan vlan; + struct flow_dissector_key_eth_addrs eth_addrs; +} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ + +struct nft_flow_match { + struct flow_dissector dissector; + struct nft_flow_key key; + struct nft_flow_key mask; +}; + +struct nft_flow_rule { + __be16 proto; + struct nft_flow_match match; + struct flow_rule *rule; +}; + +#define NFT_OFFLOAD_F_ACTION (1 << 0) + +struct nft_rule; +struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule); +void nft_flow_rule_destroy(struct nft_flow_rule *flow); +int nft_flow_rule_offload_commit(struct net *net); + +#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ + (__reg)->base_offset = \ + offsetof(struct nft_flow_key, __base); \ + (__reg)->offset = \ + offsetof(struct nft_flow_key, __base.__field); \ + (__reg)->len = __len; \ + (__reg)->key = __key; \ + memset(&(__reg)->mask, 0xff, (__reg)->len); + +#endif diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h new file mode 100644 index 000000000000..5c69e9b09388 --- /dev/null +++ b/include/net/netfilter/nft_meta.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NFT_META_H_ +#define _NFT_META_H_ + +struct nft_meta { + enum nft_meta_keys key:8; + union { + enum nft_registers dreg:8; + enum nft_registers sreg:8; + }; +}; + +extern const struct nla_policy nft_meta_policy[]; + +int nft_meta_get_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_meta_get_dump(struct sk_buff *skb, + const struct nft_expr *expr); + +int nft_meta_set_dump(struct sk_buff *skb, + const struct nft_expr *expr); + +void nft_meta_get_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_meta_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt); + +void nft_meta_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr); + +int nft_meta_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data); + +#endif diff --git a/include/net/netlink.h b/include/net/netlink.h index 395b4406f4b0..e4650e5b64a1 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -378,13 +378,17 @@ struct nla_policy { /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request + * @nl_net: Network namespace * @portid: Netlink PORTID of requesting application + * @skip_notify: Skip netlink notifications to user space + * @skip_notify_kernel: Skip selected in-kernel notifications */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; - bool skip_notify; + u8 skip_notify:1, + skip_notify_kernel:1; }; /** @@ -1755,6 +1759,15 @@ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, } static inline int +nl80211_validate_nested(const struct nlattr *start, int maxtype, + const struct nla_policy *policy, + struct netlink_ext_ack *extack) +{ + return __nla_validate_nested(start, maxtype, policy, + NL_VALIDATE_STRICT, extack); +} + +static inline int nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 736aeac52f56..95406e1342cb 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan { struct netns_ieee802154_lowpan { struct netns_sysctl_lowpan sysctl; - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 623cfbb7b8dc..bc24a8ec1ce5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -72,7 +72,7 @@ struct netns_ipv4 { struct inet_peer_base *peers; struct sock * __percpu *tcp_sk; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; struct xt_table *iptable_mangle; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5e61b5a8635d..022a0fd1a5a4 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -58,7 +58,7 @@ struct netns_ipv6 { struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; struct inet_peer_base *peers; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; @@ -116,7 +116,7 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h new file mode 100644 index 000000000000..c712ee5eebd9 --- /dev/null +++ b/include/net/netns/nexthop.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * nexthops in net namespaces + */ + +#ifndef __NETNS_NEXTHOP_H__ +#define __NETNS_NEXTHOP_H__ + +#include <linux/rbtree.h> + +struct netns_nexthop { + struct rb_root rb_root; /* tree of nexthops by id */ + struct hlist_head *devhash; /* nexthops by device */ + + unsigned int seq; /* protected by rtnl_mutex */ + u32 last_id_allocated; +}; +#endif diff --git a/include/net/nexthop.h b/include/net/nexthop.h new file mode 100644 index 000000000000..25f1f9a8419b --- /dev/null +++ b/include/net/nexthop.h @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic nexthop implementation + * + * Copyright (c) 2017-19 Cumulus Networks + * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> + */ + +#ifndef __LINUX_NEXTHOP_H +#define __LINUX_NEXTHOP_H + +#include <linux/netdevice.h> +#include <linux/route.h> +#include <linux/types.h> +#include <net/ip_fib.h> +#include <net/ip6_fib.h> +#include <net/netlink.h> + +#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK + +struct nexthop; + +struct nh_config { + u32 nh_id; + + u8 nh_family; + u8 nh_protocol; + u8 nh_blackhole; + u32 nh_flags; + + int nh_ifindex; + struct net_device *dev; + + union { + __be32 ipv4; + struct in6_addr ipv6; + } gw; + + struct nlattr *nh_grp; + u16 nh_grp_type; + + struct nlattr *nh_encap; + u16 nh_encap_type; + + u32 nlflags; + struct nl_info nlinfo; +}; + +struct nh_info { + struct hlist_node dev_hash; /* entry on netns devhash */ + struct nexthop *nh_parent; + + u8 family; + bool reject_nh; + + union { + struct fib_nh_common fib_nhc; + struct fib_nh fib_nh; + struct fib6_nh fib6_nh; + }; +}; + +struct nh_grp_entry { + struct nexthop *nh; + u8 weight; + atomic_t upper_bound; + + struct list_head nh_list; + struct nexthop *nh_parent; /* nexthop of group with this entry */ +}; + +struct nh_group { + u16 num_nh; + bool mpath; + bool has_v4; + struct nh_grp_entry nh_entries[0]; +}; + +struct nexthop { + struct rb_node rb_node; /* entry on netns rbtree */ + struct list_head fi_list; /* v4 entries using nh */ + struct list_head f6i_list; /* v6 entries using nh */ + struct list_head grp_list; /* nh group entries using this nh */ + struct net *net; + + u32 id; + + u8 protocol; /* app managing this nh */ + u8 nh_flags; + bool is_group; + + refcount_t refcnt; + struct rcu_head rcu; + + union { + struct nh_info __rcu *nh_info; + struct nh_group __rcu *nh_grp; + }; +}; + +/* caller is holding rcu or rtnl; no reference taken to nexthop */ +struct nexthop *nexthop_find_by_id(struct net *net, u32 id); +void nexthop_free_rcu(struct rcu_head *head); + +static inline bool nexthop_get(struct nexthop *nh) +{ + return refcount_inc_not_zero(&nh->refcnt); +} + +static inline void nexthop_put(struct nexthop *nh) +{ + if (refcount_dec_and_test(&nh->refcnt)) + call_rcu(&nh->rcu, nexthop_free_rcu); +} + +static inline bool nexthop_cmp(const struct nexthop *nh1, + const struct nexthop *nh2) +{ + return nh1 == nh2; +} + +static inline bool nexthop_is_multipath(const struct nexthop *nh) +{ + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + return nh_grp->mpath; + } + return false; +} + +struct nexthop *nexthop_select_path(struct nexthop *nh, int hash); + +static inline unsigned int nexthop_num_path(const struct nexthop *nh) +{ + unsigned int rc = 1; + + if (nexthop_is_multipath(nh)) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_rtnl(nh->nh_grp); + rc = nh_grp->num_nh; + } else { + const struct nh_info *nhi; + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->reject_nh) + rc = 0; + } + + return rc; +} + +static inline +struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel) +{ + const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); + + /* for_nexthops macros in fib_semantics.c grabs a pointer to + * the nexthop before checking nhsel + */ + if (nhsel >= nhg->num_nh) + return NULL; + + return nhg->nh_entries[nhsel].nh; +} + +static inline +int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + int i; + + for (i = 0; i < nhg->num_nh; i++) { + struct nexthop *nhe = nhg->nh_entries[i].nh; + struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info); + struct fib_nh_common *nhc = &nhi->fib_nhc; + int weight = nhg->nh_entries[i].weight; + + if (fib_add_nexthop(skb, nhc, weight) < 0) + return -EMSGSIZE; + } + + return 0; +} + +/* called with rcu lock */ +static inline bool nexthop_is_blackhole(const struct nexthop *nh) +{ + const struct nh_info *nhi; + + if (nexthop_is_multipath(nh)) { + if (nexthop_num_path(nh) > 1) + return false; + nh = nexthop_mpath_select(nh, 0); + if (!nh) + return false; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + return nhi->reject_nh; +} + +static inline void nexthop_path_fib_result(struct fib_result *res, int hash) +{ + struct nh_info *nhi; + struct nexthop *nh; + + nh = nexthop_select_path(res->fi->nh, hash); + nhi = rcu_dereference(nh->nh_info); + res->nhc = &nhi->fib_nhc; +} + +/* called with rcu read lock or rtnl held */ +static inline +struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel) +{ + struct nh_info *nhi; + + BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0); + BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0); + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, nhsel); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + return &nhi->fib_nhc; +} + +static inline unsigned int fib_info_num_path(const struct fib_info *fi) +{ + if (unlikely(fi->nh)) + return nexthop_num_path(fi->nh); + + return fi->fib_nhs; +} + +int fib_check_nexthop(struct nexthop *nh, u8 scope, + struct netlink_ext_ack *extack); + +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + if (unlikely(fi->nh)) + return nexthop_fib_nhc(fi->nh, nhsel); + + return &fi->fib_nh[nhsel].nh_common; +} + +/* only used when fib_nh is built into fib_info */ +static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) +{ + WARN_ON(fi->nh); + + return &fi->fib_nh[nhsel]; +} + +/* + * IPv6 variants + */ +int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, + struct netlink_ext_ack *extack); + +static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) +{ + struct nh_info *nhi; + + if (nexthop_is_multipath(nh)) { + nh = nexthop_mpath_select(nh, 0); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->family == AF_INET6) + return &nhi->fib6_nh; + + return NULL; +} + +static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) +{ + struct fib6_nh *fib6_nh; + + fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh; + return fib6_nh->fib_nh_dev; +} + +static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) +{ + struct nexthop *nh = res->f6i->nh; + struct nh_info *nhi; + + nh = nexthop_select_path(nh, hash); + + nhi = rcu_dereference_rtnl(nh->nh_info); + if (nhi->reject_nh) { + res->fib6_type = RTN_BLACKHOLE; + res->fib6_flags |= RTF_REJECT; + res->nh = nexthop_fib6_nh(nh); + } else { + res->nh = &nhi->fib6_nh; + } +} + +int nexthop_for_each_fib6_nh(struct nexthop *nh, + int (*cb)(struct fib6_nh *nh, void *arg), + void *arg); +#endif diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 694d055e01ef..2cbcdbdec254 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -16,14 +16,16 @@ * page_pool_alloc_pages() call. Drivers should likely use * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). * - * If page_pool handles DMA mapping (use page->private), then API user - * is responsible for invoking page_pool_put_page() once. In-case of - * elevated refcnt, the DMA state is released, assuming other users of - * the page will eventually call put_page(). + * API keeps track of in-flight pages, in-order to let API user know + * when it is safe to dealloactor page_pool object. Thus, API users + * must make sure to call page_pool_release_page() when a page is + * "leaving" the page_pool. Or call page_pool_put_page() where + * appropiate. For maintaining correct accounting. * - * If no DMA mapping is done, then it can act as shim-layer that - * fall-through to alloc_page. As no state is kept on the page, the - * regular put_page() call is sufficient. + * API user must only call page_pool_put_page() once on a page, as it + * will either recycle the page, or in case of elevated refcnt, it + * will release the DMA mapping and in-flight state accounting. We + * hope to lift this requirement in the future. */ #ifndef _NET_PAGE_POOL_H #define _NET_PAGE_POOL_H @@ -66,9 +68,10 @@ struct page_pool_params { }; struct page_pool { - struct rcu_head rcu; struct page_pool_params p; + u32 pages_state_hold_cnt; + /* * Data structure for allocation side * @@ -96,6 +99,14 @@ struct page_pool { * TODO: Implement bulk return pages into this structure. */ struct ptr_ring ring; + + atomic_t pages_state_release_cnt; + + /* A page_pool is strictly tied to a single RX-queue being + * protected by NAPI, due to above pp_alloc_cache. This + * refcnt serves purpose is to simplify drivers error handling. + */ + refcount_t user_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -107,9 +118,36 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +/* get the stored dma direction. A driver might decide to treat this locally and + * avoid the extra cache line from page_pool to determine the direction + */ +static +inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) +{ + return pool->p.dma_dir; +} + struct page_pool *page_pool_create(const struct page_pool_params *params); -void page_pool_destroy(struct page_pool *pool); +void __page_pool_free(struct page_pool *pool); +static inline void page_pool_free(struct page_pool *pool) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + __page_pool_free(pool); +#endif +} + +/* Drivers use this instead of page_pool_free */ +static inline void page_pool_destroy(struct page_pool *pool) +{ + if (!pool) + return; + + page_pool_free(pool); +} /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, @@ -132,6 +170,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, __page_pool_put_page(pool, page, true); } +/* API user MUST have disconnected alloc-side (not allowed to call + * page_pool_alloc_pages()) before calling this. The free-side can + * still run concurrently, to handle in-flight packet-pages. + * + * A request to shutdown can fail (with false) if there are still + * in-flight packet-pages. + */ +bool __page_pool_request_shutdown(struct page_pool *pool); +static inline bool page_pool_request_shutdown(struct page_pool *pool) +{ + bool safe_to_remove = false; + +#ifdef CONFIG_PAGE_POOL + safe_to_remove = __page_pool_request_shutdown(pool); +#endif + return safe_to_remove; +} + +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_unmap_page(struct page_pool *pool, struct page *page); +static inline void page_pool_release_page(struct page_pool *pool, + struct page *page) +{ +#ifdef CONFIG_PAGE_POOL + page_pool_unmap_page(pool, page); +#endif +} + +static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +{ + return page->dma_addr; +} + static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL @@ -141,4 +216,14 @@ static inline bool is_page_pool_compiled_in(void) #endif } +static inline void page_pool_get(struct page_pool *pool) +{ + refcount_inc(&pool->user_cnt); +} + +static inline bool page_pool_put(struct page_pool *pool) +{ + return refcount_dec_and_test(&pool->user_cnt); +} + #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 514e3c80ecc1..b03d466182db 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -7,9 +7,10 @@ #include <net/sch_generic.h> #include <net/act_api.h> #include <net/flow_offload.h> +#include <net/net_namespace.h> /* TC action not accessible from user space */ -#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1) +#define TC_ACT_CONSUMED (TC_ACT_VALUE_MAX + 1) /* Basic packet classifier frontend definitions. */ @@ -25,14 +26,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); -enum tcf_block_binder_type { - TCF_BLOCK_BINDER_TYPE_UNSPEC, - TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS, -}; - struct tcf_block_ext_info { - enum tcf_block_binder_type binder_type; + enum flow_block_binder_type binder_type; tcf_chain_head_change_t *chain_head_change; void *chain_head_change_priv; u32 block_index; @@ -71,22 +66,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -void *tcf_block_cb_priv(struct tcf_block_cb *block_cb); -struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident); -void tcf_block_cb_incref(struct tcf_block_cb *block_cb); -unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb); -struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv, - struct netlink_ext_ack *extack); -int tcf_block_cb_register(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv, struct netlink_ext_ack *extack); -void __tcf_block_cb_unregister(struct tcf_block *block, - struct tcf_block_cb *block_cb); -void tcf_block_cb_unregister(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident); int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, tc_indr_block_bind_cb_t *cb, void *cb_ident); int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, @@ -150,59 +129,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb, } static inline -void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) -{ - return NULL; -} - -static inline -struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident) -{ - return NULL; -} - -static inline -void tcf_block_cb_incref(struct tcf_block_cb *block_cb) -{ -} - -static inline -unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) -{ - return 0; -} - -static inline -struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv, - struct netlink_ext_ack *extack) -{ - return NULL; -} - -static inline -int tcf_block_cb_register(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident, - void *cb_priv, struct netlink_ext_ack *extack) -{ - return 0; -} - -static inline -void __tcf_block_cb_unregister(struct tcf_block *block, - struct tcf_block_cb *block_cb) -{ -} - -static inline -void tcf_block_cb_unregister(struct tcf_block *block, - tc_setup_cb_t *cb, void *cb_ident) -{ -} - -static inline int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, tc_indr_block_bind_cb_t *cb, void *cb_ident) { @@ -576,9 +502,6 @@ static inline int tcf_valid_offset(const struct sk_buff *skb, (ptr <= (ptr + len))); } -#ifdef CONFIG_NET_CLS_IND -#include <net/net_namespace.h> - static inline int tcf_change_indev(struct net *net, struct nlattr *indev_tlv, struct netlink_ext_ack *extack) @@ -605,7 +528,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) return false; return ifindex == skb->skb_iif; } -#endif /* CONFIG_NET_CLS_IND */ int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts); @@ -613,25 +535,6 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); -enum tc_block_command { - TC_BLOCK_BIND, - TC_BLOCK_UNBIND, -}; - -struct tc_block_offload { - enum tc_block_command command; - enum tcf_block_binder_type binder_type; - struct tcf_block *block; - struct netlink_ext_ack *extack; -}; - -struct tc_cls_common_offload { - u32 chain_index; - __be16 protocol; - u32 prio; - struct netlink_ext_ack *extack; -}; - struct tc_cls_u32_knode { struct tcf_exts *exts; struct tcf_result *res; @@ -659,7 +562,7 @@ enum tc_clsu32_command { }; struct tc_cls_u32_offload { - struct tc_cls_common_offload common; + struct flow_cls_common_offload common; /* knode values */ enum tc_clsu32_command command; union { @@ -686,7 +589,7 @@ static inline bool tc_can_offload_extack(const struct net_device *dev, static inline bool tc_cls_can_offload_and_chain0(const struct net_device *dev, - struct tc_cls_common_offload *common) + struct flow_cls_common_offload *common) { if (!tc_can_offload_extack(dev, common->extack)) return false; @@ -728,7 +631,7 @@ static inline bool tc_in_hw(u32 flags) } static inline void -tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, +tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, const struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { @@ -739,29 +642,6 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->extack = extack; } -enum tc_fl_command { - TC_CLSFLOWER_REPLACE, - TC_CLSFLOWER_DESTROY, - TC_CLSFLOWER_STATS, - TC_CLSFLOWER_TMPLT_CREATE, - TC_CLSFLOWER_TMPLT_DESTROY, -}; - -struct tc_cls_flower_offload { - struct tc_cls_common_offload common; - enum tc_fl_command command; - unsigned long cookie; - struct flow_rule *rule; - struct flow_stats stats; - u32 classid; -}; - -static inline struct flow_rule * -tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd) -{ - return tc_flow_cmd->rule; -} - enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, @@ -769,7 +649,7 @@ enum tc_matchall_command { }; struct tc_cls_matchall_offload { - struct tc_cls_common_offload common; + struct flow_cls_common_offload common; enum tc_matchall_command command; struct flow_rule *rule; struct flow_stats stats; @@ -782,7 +662,7 @@ enum tc_clsbpf_command { }; struct tc_cls_bpf_offload { - struct tc_cls_common_offload common; + struct flow_cls_common_offload common; enum tc_clsbpf_command command; struct tcf_exts *exts; struct bpf_prog *prog; diff --git a/include/net/route.h b/include/net/route.h index 55ff71ffb796..630a0493f1f3 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -231,6 +231,10 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); void rt_add_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt); +int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, + u32 table_id, struct fib_info *fi, + int *fa_index, int fa_start); + static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 21f434f3ac9e..855167bbc372 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -279,7 +279,7 @@ struct tcf_result { }; const struct tcf_proto *goto_tp; - /* used by the TC_ACT_REINSERT action */ + /* used in the skb_tc_reinsert function */ struct { bool ingress; struct gnet_stats_queue *qstats; diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 314699333bec..5a9bb09f32b6 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -43,19 +43,21 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, (__force __u32)csum2, len); } +static const struct skb_checksum_ops sctp_csum_ops = { + .update = sctp_csum_update, + .combine = sctp_csum_combine, +}; + static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); - const struct skb_checksum_ops ops = { - .update = sctp_csum_update, - .combine = sctp_csum_combine, - }; __le32 old = sh->checksum; __wsum new; sh->checksum = 0; - new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops); + new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, + &sctp_csum_ops); sh->checksum = old; return cpu_to_le32((__force __u32)new); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0767701ef362..ba5c4f6eede5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -219,7 +219,6 @@ struct sctp_sock { disable_fragments:1, v4mapped:1, frag_interleave:1, - strm_interleave:1, recvrcvinfo:1, recvnxtinfo:1, data_ready_signalled:1; @@ -1324,6 +1323,7 @@ struct sctp_endpoint { struct list_head endpoint_shared_keys; __u16 active_key_id; __u8 auth_enable:1, + intl_enable:1, prsctp_enable:1, reconf_enable:1; @@ -1679,28 +1679,30 @@ struct sctp_association { __be16 addip_disabled_mask; /* These are capabilities which our peer advertised. */ - __u8 ecn_capable:1, /* Can peer do ECN? */ + __u16 ecn_capable:1, /* Can peer do ECN? */ ipv4_address:1, /* Peer understands IPv4 addresses? */ ipv6_address:1, /* Peer understands IPv6 addresses? */ hostname_address:1, /* Peer understands DNS addresses? */ asconf_capable:1, /* Does peer support ADDIP? */ prsctp_capable:1, /* Can peer do PR-SCTP? */ reconf_capable:1, /* Can peer do RE-CONFIG? */ - auth_capable:1; /* Is peer doing SCTP-AUTH? */ - - /* sack_needed : This flag indicates if the next received - * : packet is to be responded to with a - * : SACK. This is initialized to 0. When a packet - * : is received sack_cnt is incremented. If this value - * : reaches 2 or more, a SACK is sent and the - * : value is reset to 0. Note: This is used only - * : when no DATA chunks are received out of - * : order. When DATA chunks are out of order, - * : SACK's are not delayed (see Section 6). - */ - __u8 sack_needed:1, /* Do we need to sack the peer? */ + intl_capable:1, /* Can peer do INTERLEAVE */ + auth_capable:1, /* Is peer doing SCTP-AUTH? */ + /* sack_needed: + * This flag indicates if the next received + * packet is to be responded to with a + * SACK. This is initialized to 0. When a packet + * is received sack_cnt is incremented. If this value + * reaches 2 or more, a SACK is sent and the + * value is reset to 0. Note: This is used only + * when no DATA chunks are received out of + * order. When DATA chunks are out of order, + * SACK's are not delayed (see Section 6). + */ + sack_needed:1, /* Do we need to sack the peer? */ sack_generation:1, zero_window_announced:1; + __u32 sack_cnt; __u32 adaptation_ind; /* Adaptation Code point. */ @@ -2049,10 +2051,7 @@ struct sctp_association { __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ temp:1, /* Is it a temporary association? */ - force_delay:1, - intl_enable:1, - prsctp_enable:1, - reconf_enable:1; + force_delay:1; __u8 strreset_enable; __u8 strreset_outstanding; /* request param count on the fly */ diff --git a/include/net/sock.h b/include/net/sock.h index 6cbc16136357..228db3998e46 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1822,7 +1822,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) { WARN_ON(parent->sk); write_lock_bh(&sk->sk_callback_lock); - rcu_assign_pointer(sk->sk_wq, parent->wq); + rcu_assign_pointer(sk->sk_wq, &parent->wq); parent->sk = sk; sk_set_socket(sk, parent); sk->sk_uid = SOCK_INODE(parent)->i_uid; @@ -2100,7 +2100,7 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock, poll_table *p) { if (!poll_does_not_wait(p)) { - poll_wait(filp, &sock->wq->wait, p); + poll_wait(filp, &sock->wq.wait, p); /* We need to be sure we are in sync with the * socket flags modification. * diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index 8a5f70c7cdf2..d9112de85261 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -35,6 +35,8 @@ extern struct sock *reuseport_select_sock(struct sock *sk, struct sk_buff *skb, int hdr_len); extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); +extern int reuseport_detach_prog(struct sock *sk); + int reuseport_get_id(struct sock_reuseport *reuse); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h new file mode 100644 index 000000000000..bdc20ab3b88d --- /dev/null +++ b/include/net/tc_act/tc_ct.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_CT_H +#define __NET_TC_CT_H + +#include <net/act_api.h> +#include <uapi/linux/tc_act/tc_ct.h> + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_labels.h> + +struct tcf_ct_params { + struct nf_conn *tmpl; + u16 zone; + + u32 mark; + u32 mark_mask; + + u32 labels[NF_CT_LABELS_MAX_SIZE / sizeof(u32)]; + u32 labels_mask[NF_CT_LABELS_MAX_SIZE / sizeof(u32)]; + + struct nf_nat_range2 range; + bool ipv4_range; + + u16 ct_action; + + struct rcu_head rcu; +}; + +struct tcf_ct { + struct tc_action common; + struct tcf_ct_params __rcu *params; +}; + +#define to_ct(a) ((struct tcf_ct *)a) +#define to_ct_params(a) ((struct tcf_ct_params *) \ + rtnl_dereference((to_ct(a)->params))) + +static inline uint16_t tcf_ct_zone(const struct tc_action *a) +{ + return to_ct_params(a)->zone; +} + +static inline int tcf_ct_action(const struct tc_action *a) +{ + return to_ct_params(a)->ct_action; +} + +#else +static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; } +static inline int tcf_ct_action(const struct tc_action *a) { return 0; } +#endif /* CONFIG_NF_CONNTRACK */ + +static inline bool is_tcf_ct(const struct tc_action *a) +{ +#if defined(CONFIG_NET_CLS_ACT) && IS_ENABLED(CONFIG_NF_CONNTRACK) + if (a->ops && a->ops->id == TCA_ID_CT) + return true; +#endif + return false; +} + +#endif /* __NET_TC_CT_H */ diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h new file mode 100644 index 000000000000..f071c1d70a25 --- /dev/null +++ b/include/net/tc_act/tc_ctinfo.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_CTINFO_H +#define __NET_TC_CTINFO_H + +#include <net/act_api.h> + +struct tcf_ctinfo_params { + struct rcu_head rcu; + struct net *net; + u32 dscpmask; + u32 dscpstatemask; + u32 cpmarkmask; + u16 zone; + u8 mode; + u8 dscpmaskshift; +}; + +struct tcf_ctinfo { + struct tc_action common; + struct tcf_ctinfo_params __rcu *params; + u64 stats_dscp_set; + u64 stats_dscp_error; + u64 stats_cpmark_set; +}; + +enum { + CTINFO_MODE_DSCP = BIT(0), + CTINFO_MODE_CPMARK = BIT(1) +}; + +#define to_ctinfo(a) ((struct tcf_ctinfo *)a) + +#endif /* __NET_TC_CTINFO_H */ diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h new file mode 100644 index 000000000000..4bc3d9250ef0 --- /dev/null +++ b/include/net/tc_act/tc_mpls.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef __NET_TC_MPLS_H +#define __NET_TC_MPLS_H + +#include <linux/tc_act/tc_mpls.h> +#include <net/act_api.h> + +struct tcf_mpls_params { + int tcfm_action; + u32 tcfm_label; + u8 tcfm_tc; + u8 tcfm_ttl; + u8 tcfm_bos; + __be16 tcfm_proto; + struct rcu_head rcu; +}; + +#define ACT_MPLS_TC_NOT_SET 0xff +#define ACT_MPLS_BOS_NOT_SET 0xff +#define ACT_MPLS_LABEL_NOT_SET 0xffffffff + +struct tcf_mpls { + struct tc_action common; + struct tcf_mpls_params __rcu *mpls_p; +}; +#define to_mpls(a) ((struct tcf_mpls *)a) + +#endif /* __NET_TC_MPLS_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 582c0caa9811..cca3c59b98bf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include <linux/seq_file.h> #include <linux/memcontrol.h> #include <linux/bpf-cgroup.h> +#include <linux/siphash.h> extern struct inet_hashinfo tcp_hashinfo; @@ -1612,7 +1613,7 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *key, unsigned int len); + void *primary_key, void *backup_key); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -1622,13 +1623,16 @@ void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); -#define TCP_FASTOPEN_KEY_LENGTH 16 +#define TCP_FASTOPEN_KEY_LENGTH sizeof(siphash_key_t) +#define TCP_FASTOPEN_KEY_MAX 2 +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher *tfm; - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; - struct rcu_head rcu; + siphash_key_t key[TCP_FASTOPEN_KEY_MAX]; + int num; + struct rcu_head rcu; }; extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; @@ -1637,6 +1641,35 @@ bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); +/* Caller needs to wrap with rcu_read_(un)lock() */ +static inline +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) +{ + struct tcp_fastopen_context *ctx; + + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); + if (!ctx) + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + return ctx; +} + +static inline +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, + const struct tcp_fastopen_cookie *orig) +{ + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && + orig->len == foc->len && + !memcmp(orig->val, foc->val, foc->len)) + return true; + return false; +} + +static inline +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) +{ + return ctx->num; +} + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ @@ -2188,6 +2221,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } +static inline void tcp_bpf_rtt(struct sock *sk) +{ + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) + tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); +} + #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif @@ -2199,4 +2238,26 @@ void clean_acked_data_disable(struct inet_connection_sock *icsk); void clean_acked_data_flush(void); #endif +DECLARE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); +static inline void tcp_add_tx_delay(struct sk_buff *skb, + const struct tcp_sock *tp) +{ + if (static_branch_unlikely(&tcp_tx_delay_enabled)) + skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; +} + +/* Compute Earliest Departure Time for some control packets + * like ACK or RST for TIME_WAIT or non ESTABLISHED sockets. + */ +static inline u64 tcp_transmit_time(const struct sock *sk) +{ + if (static_branch_unlikely(&tcp_tx_delay_enabled)) { + u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? + tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; + + return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; + } + return 0; +} + #endif /* _TCP_H */ diff --git a/include/net/tls.h b/include/net/tls.h index 53d96bca220d..584609174fe0 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -40,6 +40,7 @@ #include <linux/socket.h> #include <linux/tcp.h> #include <linux/skmsg.h> +#include <linux/netdevice.h> #include <net/tcp.h> #include <net/strparser.h> @@ -61,6 +62,7 @@ #define TLS_DEVICE_NAME_MAX 32 #define MAX_IV_SIZE 16 +#define TLS_MAX_REC_SEQ_SIZE 8 /* For AES-CCM, the full 16-bytes of IV is made of '4' fields of given sizes. * @@ -197,20 +199,24 @@ struct tls_offload_context_tx { struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; void (*sk_destruct)(struct sock *sk); - u8 driver_state[]; + u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ -#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *))) +#define TLS_DRIVER_STATE_SIZE_TX 16 }; #define TLS_OFFLOAD_CONTEXT_SIZE_TX \ - (ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) + \ - TLS_DRIVER_STATE_SIZE) + (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) enum tls_context_flags { TLS_RX_SYNC_RUNNING = 0, + /* Unlike RX where resync is driven entirely by the core in TX only + * the driver knows when things went out of sync, so we need the flag + * to be atomic. + */ + TLS_TX_SYNC_SCHED = 1, }; struct cipher_context { @@ -240,34 +246,32 @@ struct tls_prot_info { }; struct tls_context { + /* read-only cache line */ struct tls_prot_info prot_info; - union tls_crypto_context crypto_send; - union tls_crypto_context crypto_recv; + u8 tx_conf:3; + u8 rx_conf:3; - struct list_head list; - struct net_device *netdev; - refcount_t refcount; + int (*push_pending_record)(struct sock *sk, int flags); + void (*sk_write_space)(struct sock *sk); void *priv_ctx_tx; void *priv_ctx_rx; - u8 tx_conf:3; - u8 rx_conf:3; + struct net_device *netdev; + /* rw cache line */ struct cipher_context tx; struct cipher_context rx; struct scatterlist *partially_sent_record; u16 partially_sent_offset; - unsigned long flags; bool in_tcp_sendpages; bool pending_open_record_frags; + unsigned long flags; - int (*push_pending_record)(struct sock *sk, int flags); - - void (*sk_write_space)(struct sock *sk); + /* cache cold stuff */ void (*sk_destruct)(struct sock *sk); void (*sk_proto_close)(struct sock *sk, long timeout); @@ -279,6 +283,12 @@ struct tls_context { int __user *optlen); int (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + + union tls_crypto_context crypto_send; + union tls_crypto_context crypto_recv; + + struct list_head list; + refcount_t refcount; }; enum tls_offload_ctx_dir { @@ -294,25 +304,50 @@ struct tlsdev_ops { void (*tls_dev_del)(struct net_device *netdev, struct tls_context *ctx, enum tls_offload_ctx_dir direction); - void (*tls_dev_resync_rx)(struct net_device *netdev, - struct sock *sk, u32 seq, u64 rcd_sn); + int (*tls_dev_resync)(struct net_device *netdev, + struct sock *sk, u32 seq, u8 *rcd_sn, + enum tls_offload_ctx_dir direction); }; +enum tls_offload_sync_type { + TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ = 0, + TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT = 1, +}; + +#define TLS_DEVICE_RESYNC_NH_START_IVAL 2 +#define TLS_DEVICE_RESYNC_NH_MAX_IVAL 128 + struct tls_offload_context_rx { /* sw must be the first member of tls_offload_context_rx */ struct tls_sw_context_rx sw; - atomic64_t resync_req; - u8 driver_state[]; + enum tls_offload_sync_type resync_type; + /* this member is set regardless of resync_type, to avoid branches */ + u8 resync_nh_reset:1; + /* CORE_NEXT_HINT-only member, but use the hole here */ + u8 resync_nh_do_now:1; + union { + /* TLS_OFFLOAD_SYNC_TYPE_DRIVER_REQ */ + struct { + atomic64_t resync_req; + }; + /* TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT */ + struct { + u32 decrypted_failed; + u32 decrypted_tgt; + } resync_nh; + }; + u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough * driver specific state to justify another layer of indirection */ +#define TLS_DRIVER_STATE_SIZE_RX 8 }; #define TLS_OFFLOAD_CONTEXT_SIZE_RX \ - (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \ - TLS_DRIVER_STATE_SIZE) + (sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX) +void tls_ctx_free(struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); @@ -431,19 +466,15 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) } static inline void tls_advance_record_sn(struct sock *sk, - struct cipher_context *ctx, - int version) + struct tls_prot_info *prot, + struct cipher_context *ctx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) tls_err_abort(sk, EBADMSG); - if (version != TLS_1_3_VERSION) { + if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, prot->iv_size); - } } static inline void tls_fill_prepend(struct tls_context *ctx, @@ -545,6 +576,23 @@ tls_offload_ctx_rx(const struct tls_context *tls_ctx) return (struct tls_offload_context_rx *)tls_ctx->priv_ctx_rx; } +#if IS_ENABLED(CONFIG_TLS_DEVICE) +static inline void *__tls_driver_ctx(struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + return tls_offload_ctx_tx(tls_ctx)->driver_state; + else + return tls_offload_ctx_rx(tls_ctx)->driver_state; +} + +static inline void * +tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction) +{ + return __tls_driver_ctx(tls_get_ctx(sk), direction); +} +#endif + /* The TLS context is valid until sk_destruct is called */ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) { @@ -554,6 +602,31 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1); } +static inline void +tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + tls_offload_ctx_rx(tls_ctx)->resync_type = type; +} + +static inline void tls_offload_tx_resync_request(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + + WARN_ON(test_and_set_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags)); +} + +/* Driver's seq tracking has to be disabled until resync succeeded */ +static inline bool tls_offload_tx_resync_pending(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + bool ret; + + ret = test_bit(TLS_TX_SYNC_SCHED, &tls_ctx->flags); + smp_mb__after_atomic(); + return ret; +} int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, unsigned char *record_type); @@ -562,6 +635,7 @@ void tls_unregister_device(struct tls_device *device); int tls_device_decrypted(struct sock *sk, struct sk_buff *skb); int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgout); +struct sk_buff *tls_encrypt_skb(struct sk_buff *skb); struct sk_buff *tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, @@ -574,6 +648,6 @@ int tls_sw_fallback_init(struct sock *sk, int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); -void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn); +void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq); #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 83b5999a2587..dc1583a1fb8a 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -242,7 +242,7 @@ struct vxlan_dev { struct vxlan_rdst default_dst; /* default destination */ struct timer_list age_timer; - spinlock_t hash_lock; + spinlock_t hash_lock[FDB_HASH_SIZE]; unsigned int addrcnt; struct gro_cells gro_cells; diff --git a/include/net/xdp.h b/include/net/xdp.h index 8e0deddef35c..40c6d3398458 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +/* When sending xdp_frame into the network stack, then there is no + * return point callback, which is needed to release e.g. DMA-mapping + * resources with page_pool. Thus, have explicit function to release + * frame resources. + */ +void __xdp_release_frame(void *data, struct xdp_mem_info *mem); +static inline void xdp_release_frame(struct xdp_frame *xdpf) +{ + struct xdp_mem_info *mem = &xdpf->mem; + + /* Curr only page_pool needs this */ + if (mem->type == MEM_TYPE_PAGE_POOL) + __xdp_release_frame(xdpf->data, mem); +} + int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h new file mode 100644 index 000000000000..6a8cba6ea79a --- /dev/null +++ b/include/net/xdp_priv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NET_XDP_PRIV_H__ +#define __LINUX_NET_XDP_PRIV_H__ + +#include <linux/rhashtable.h> + +/* Private to net/core/xdp.c, but used by trace/events/xdp.h */ +struct xdp_mem_allocator { + struct xdp_mem_info mem; + union { + void *allocator; + struct page_pool *page_pool; + struct zero_copy_allocator *zc_alloc; + }; + int disconnect_cnt; + unsigned long defer_start; + struct rhash_head node; + struct rcu_head rcu; + struct delayed_work defer_wq; + unsigned long defer_warn; +}; + +#endif /* __LINUX_NET_XDP_PRIV_H__ */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..69796d264f06 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,15 +58,22 @@ struct xdp_sock { struct xdp_umem *umem; struct list_head flush_node; u16 queue_id; - struct xsk_queue *tx ____cacheline_aligned_in_smp; - struct list_head list; bool zc; + enum { + XSK_READY = 0, + XSK_BOUND, + XSK_UNBOUND, + } state; /* Protects multiple processes in the control path */ struct mutex mutex; + struct xsk_queue *tx ____cacheline_aligned_in_smp; + struct list_head list; /* Mutual exclusion of NAPI TX thread and sendmsg error paths * in the SKB destructor callback. */ spinlock_t tx_completion_lock; + /* Protects generic receive. */ + spinlock_t rx_lock; u64 rx_dropped; }; @@ -77,10 +84,11 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); void xsk_flush(struct xdp_sock *xs); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ +bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); -bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); +bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries); struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem, @@ -99,6 +107,16 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) } /* Reuse-queue aware version of FILL queue helpers */ +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + struct xdp_umem_fq_reuse *rq = umem->fq_reuse; + + if (rq->length >= cnt) + return true; + + return xsk_umem_has_addrs(umem, cnt - rq->length); +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; @@ -146,6 +164,11 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) return false; } +static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { return NULL; @@ -159,8 +182,8 @@ static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { } -static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, - u32 *len) +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, + struct xdp_desc *desc) { return false; } @@ -200,6 +223,11 @@ static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) return 0; } +static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) +{ + return false; +} + static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { return NULL; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a2907873ed56..b22db30c3d88 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -346,22 +346,19 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 portid); int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { - unsigned int family; - unsigned int proto; - __be16 eth_proto; - struct module *owner; - const struct xfrm_type *type_map[IPPROTO_MAX]; - const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; - - int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_selector *sel, - const struct flowi *fl); - void (*init_temprop)(struct xfrm_state *x, - const struct xfrm_tmpl *tmpl, - const xfrm_address_t *daddr, - const xfrm_address_t *saddr); - int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); - int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); + u8 family; + u8 proto; + + const struct xfrm_type_offload *type_offload_esp; + + const struct xfrm_type *type_esp; + const struct xfrm_type *type_ipip; + const struct xfrm_type *type_ipip6; + const struct xfrm_type *type_comp; + const struct xfrm_type *type_ah; + const struct xfrm_type *type_routing; + const struct xfrm_type *type_dstopts; + int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*output_finish)(struct sock *sk, struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, @@ -407,12 +404,10 @@ struct xfrm_type { int (*reject)(struct xfrm_state *, struct sk_buff *, const struct flowi *); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); - /* Estimate maximal size of result of transformation of a dgram */ - u32 (*get_mtu)(struct xfrm_state *, int size); }; int xfrm_register_type(const struct xfrm_type *type, unsigned short family); -int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); +void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family); struct xfrm_type_offload { char *description; @@ -424,7 +419,7 @@ struct xfrm_type_offload { }; int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); -int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); +void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); static inline int xfrm_af2proto(unsigned int family) { @@ -1508,21 +1503,19 @@ struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY -int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family, struct net *net); -int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, +void xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); +void xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family); #else -static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, - int n, unsigned short family, struct net *net) +static inline void xfrm_tmpl_sort(struct xfrm_tmpl **d, struct xfrm_tmpl **s, + int n, unsigned short family) { - return -ENOSYS; } -static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, - int n, unsigned short family) +static inline void xfrm_state_sort(struct xfrm_state **d, struct xfrm_state **s, + int n, unsigned short family) { - return -ENOSYS; } #endif @@ -1551,7 +1544,7 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -int xfrm_state_mtu(struct xfrm_state *x, int mtu); +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); |