From 5716146295183651cfd89ae2ea46c5ef0d31faa8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:35 +0200 Subject: wifi: regulatory: remove extra documentation The struct member country_ie_checksum doesn't exist, so don't document it. Link: https://msgid.link/20240515093852.ebcc9673558b.Ie0b58c1249c6375c60859fa6474d7cdd8862b065@changeid Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebf9e028d1ef..a103f4c8cf75 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -71,8 +71,6 @@ enum environment_cap { * CRDA and can be used by other regulatory requests. When a * the last request is not yet processed we must yield until it * is processed before processing any new requests. - * @country_ie_checksum: checksum of the last processed and accepted - * country IE * @country_ie_env: lets us know if the AP is telling us we are outdoor, * indoor, or if it doesn't matter * @list: used to insert into the reg_requests_list linked list -- cgit v1.2.3 From 3bb8dce41c023e39ec4d79a83742403b5064a276 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:36 +0200 Subject: wifi: ieee80211: add missing doc short descriptions Some structures erroneously don't have a short description, add the missing descriptions. Link: https://msgid.link/20240515093852.16f4355e918e.I940276a4fb006ada68ab1a3e6077e3229fff0f14@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index de2dce743ee2..713266ce48a7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1101,7 +1101,7 @@ enum ieee80211_vht_opmode_bits { }; /** - * enum ieee80211_s1g_chanwidth + * enum ieee80211_s1g_chanwidth - S1G channel widths * These are defined in IEEE802.11-2016ah Table 10-20 * as BSS Channel Width * @@ -4145,7 +4145,7 @@ enum ieee80211_idle_options { }; /** - * struct ieee80211_bss_max_idle_period_ie + * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct * * This structure refers to "BSS Max idle period element" * @@ -4180,7 +4180,7 @@ enum ieee80211_sa_query_action { }; /** - * struct ieee80211_bssid_index + * struct ieee80211_bssid_index - multiple BSSID index element structure * * This structure refers to "Multiple BSSID-index element" * @@ -4195,7 +4195,8 @@ struct ieee80211_bssid_index { }; /** - * struct ieee80211_multiple_bssid_configuration + * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration + * element structure * * This structure refers to "Multiple BSSID Configuration element" * -- cgit v1.2.3 From 1c26f09b20bafc92e0de80e84942be77d4d3c61e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:37 +0200 Subject: wifi: radiotap: document ieee80211_get_radiotap_len() return value Document the return value of ieee80211_get_radiotap_len() in the proper kernel-doc format. Link: https://msgid.link/20240515093852.143aadfdb094.I8795ec1e8cfd7106d58325fb514bae92625fb45c@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 925bac726a92..91762faecc13 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -582,6 +582,7 @@ enum ieee80211_radiotap_eht_usig_tb { /** * ieee80211_get_radiotap_len - get radiotap header length * @data: pointer to the header + * Return: the radiotap header length */ static inline u16 ieee80211_get_radiotap_len(const char *data) { -- cgit v1.2.3 From 54856871298c9a8717450351699a8e6fe706101c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:38 +0200 Subject: wifi: ieee80211: remove ieee80211_next_tbtt_present() This is actually completely equivalent to the other function ieee80211_is_s1g_short_beacon(), but open-codes the logic. Implement the necessary logic in ieee80211_is_s1g_short_beacon() and remove ieee80211_next_tbtt_present(). Link: https://msgid.link/20240515093852.774ced74dea8.I152525b4cff6e6a25be6c48fe6a4b89f17bab8a9@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 713266ce48a7..72b351abb4f6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -604,25 +604,15 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) } /** - * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT && - * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT - * @fc: frame control bytes in little-endian byteorder - */ -static inline bool ieee80211_next_tbtt_present(__le16 fc) -{ - return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == - cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) && - fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT); -} - -/** - * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only - * true for S1G beacons when they're short. + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G short beacon, + * i.e. it is an S1G beacon with 'next TBTT' flag set */ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) { - return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc); + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** -- cgit v1.2.3 From 3c75e99c7036a87f159dbd526060554afb3482f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:39 +0200 Subject: wifi: ieee80211: document function return values These are all missing, as pointed out when running kernel-doc. Add return value documentation and fix some small things while at it. Link: https://msgid.link/20240515093852.1cd5ad8f354d.Idc16e9767fa42de80b659c32efc58aea38c26996@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 106 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 72b351abb4f6..595f83783f0e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -373,6 +373,7 @@ struct ieee80211_trigger { /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has to-DS set */ static inline bool ieee80211_has_tods(__le16 fc) { @@ -382,6 +383,7 @@ static inline bool ieee80211_has_tods(__le16 fc) /** * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has from-DS set */ static inline bool ieee80211_has_fromds(__le16 fc) { @@ -391,6 +393,7 @@ static inline bool ieee80211_has_fromds(__le16 fc) /** * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not it's a 4-address frame (from-DS and to-DS set) */ static inline bool ieee80211_has_a4(__le16 fc) { @@ -401,6 +404,7 @@ static inline bool ieee80211_has_a4(__le16 fc) /** * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has more fragments (more frags bit set) */ static inline bool ieee80211_has_morefrags(__le16 fc) { @@ -410,6 +414,7 @@ static inline bool ieee80211_has_morefrags(__le16 fc) /** * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the retry flag is set */ static inline bool ieee80211_has_retry(__le16 fc) { @@ -419,6 +424,7 @@ static inline bool ieee80211_has_retry(__le16 fc) /** * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the power management flag is set */ static inline bool ieee80211_has_pm(__le16 fc) { @@ -428,6 +434,7 @@ static inline bool ieee80211_has_pm(__le16 fc) /** * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the more data flag is set */ static inline bool ieee80211_has_moredata(__le16 fc) { @@ -437,6 +444,7 @@ static inline bool ieee80211_has_moredata(__le16 fc) /** * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the protected flag is set */ static inline bool ieee80211_has_protected(__le16 fc) { @@ -446,6 +454,7 @@ static inline bool ieee80211_has_protected(__le16 fc) /** * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the order flag is set */ static inline bool ieee80211_has_order(__le16 fc) { @@ -455,6 +464,7 @@ static inline bool ieee80211_has_order(__le16 fc) /** * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is management */ static inline bool ieee80211_is_mgmt(__le16 fc) { @@ -465,6 +475,7 @@ static inline bool ieee80211_is_mgmt(__le16 fc) /** * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is control */ static inline bool ieee80211_is_ctl(__le16 fc) { @@ -475,6 +486,7 @@ static inline bool ieee80211_is_ctl(__le16 fc) /** * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a data frame */ static inline bool ieee80211_is_data(__le16 fc) { @@ -485,6 +497,7 @@ static inline bool ieee80211_is_data(__le16 fc) /** * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is extended */ static inline bool ieee80211_is_ext(__le16 fc) { @@ -496,6 +509,7 @@ static inline bool ieee80211_is_ext(__le16 fc) /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame */ static inline bool ieee80211_is_data_qos(__le16 fc) { @@ -510,6 +524,8 @@ static inline bool ieee80211_is_data_qos(__le16 fc) /** * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame that has data + * (i.e. is not null data) */ static inline bool ieee80211_is_data_present(__le16 fc) { @@ -524,6 +540,7 @@ static inline bool ieee80211_is_data_present(__le16 fc) /** * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association request */ static inline bool ieee80211_is_assoc_req(__le16 fc) { @@ -534,6 +551,7 @@ static inline bool ieee80211_is_assoc_req(__le16 fc) /** * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association response */ static inline bool ieee80211_is_assoc_resp(__le16 fc) { @@ -544,6 +562,7 @@ static inline bool ieee80211_is_assoc_resp(__le16 fc) /** * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation request */ static inline bool ieee80211_is_reassoc_req(__le16 fc) { @@ -554,6 +573,7 @@ static inline bool ieee80211_is_reassoc_req(__le16 fc) /** * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation response */ static inline bool ieee80211_is_reassoc_resp(__le16 fc) { @@ -564,6 +584,7 @@ static inline bool ieee80211_is_reassoc_resp(__le16 fc) /** * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe request */ static inline bool ieee80211_is_probe_req(__le16 fc) { @@ -574,6 +595,7 @@ static inline bool ieee80211_is_probe_req(__le16 fc) /** * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe response */ static inline bool ieee80211_is_probe_resp(__le16 fc) { @@ -584,6 +606,7 @@ static inline bool ieee80211_is_probe_resp(__le16 fc) /** * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a (regular, not S1G) beacon */ static inline bool ieee80211_is_beacon(__le16 fc) { @@ -595,6 +618,7 @@ static inline bool ieee80211_is_beacon(__le16 fc) * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && * IEEE80211_STYPE_S1G_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G beacon */ static inline bool ieee80211_is_s1g_beacon(__le16 fc) { @@ -618,6 +642,7 @@ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ATIM frame */ static inline bool ieee80211_is_atim(__le16 fc) { @@ -628,6 +653,7 @@ static inline bool ieee80211_is_atim(__le16 fc) /** * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a disassociation frame */ static inline bool ieee80211_is_disassoc(__le16 fc) { @@ -638,6 +664,7 @@ static inline bool ieee80211_is_disassoc(__le16 fc) /** * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an authentication frame */ static inline bool ieee80211_is_auth(__le16 fc) { @@ -648,6 +675,7 @@ static inline bool ieee80211_is_auth(__le16 fc) /** * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a deauthentication frame */ static inline bool ieee80211_is_deauth(__le16 fc) { @@ -658,6 +686,7 @@ static inline bool ieee80211_is_deauth(__le16 fc) /** * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an action frame */ static inline bool ieee80211_is_action(__le16 fc) { @@ -668,6 +697,7 @@ static inline bool ieee80211_is_action(__le16 fc) /** * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK request frame */ static inline bool ieee80211_is_back_req(__le16 fc) { @@ -678,6 +708,7 @@ static inline bool ieee80211_is_back_req(__le16 fc) /** * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK frame */ static inline bool ieee80211_is_back(__le16 fc) { @@ -688,6 +719,7 @@ static inline bool ieee80211_is_back(__le16 fc) /** * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a PS-poll frame */ static inline bool ieee80211_is_pspoll(__le16 fc) { @@ -698,6 +730,7 @@ static inline bool ieee80211_is_pspoll(__le16 fc) /** * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an RTS frame */ static inline bool ieee80211_is_rts(__le16 fc) { @@ -708,6 +741,7 @@ static inline bool ieee80211_is_rts(__le16 fc) /** * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CTS frame */ static inline bool ieee80211_is_cts(__le16 fc) { @@ -718,6 +752,7 @@ static inline bool ieee80211_is_cts(__le16 fc) /** * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ACK frame */ static inline bool ieee80211_is_ack(__le16 fc) { @@ -728,6 +763,7 @@ static inline bool ieee80211_is_ack(__le16 fc) /** * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end frame */ static inline bool ieee80211_is_cfend(__le16 fc) { @@ -738,6 +774,7 @@ static inline bool ieee80211_is_cfend(__le16 fc) /** * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end-ack frame */ static inline bool ieee80211_is_cfendack(__le16 fc) { @@ -748,6 +785,7 @@ static inline bool ieee80211_is_cfendack(__le16 fc) /** * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc frame */ static inline bool ieee80211_is_nullfunc(__le16 fc) { @@ -758,6 +796,7 @@ static inline bool ieee80211_is_nullfunc(__le16 fc) /** * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS nullfunc frame */ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { @@ -768,6 +807,7 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) /** * ieee80211_is_trigger - check if frame is trigger frame * @fc: frame control field in little-endian byteorder + * Return: whether or not the frame is a trigger frame */ static inline bool ieee80211_is_trigger(__le16 fc) { @@ -778,6 +818,7 @@ static inline bool ieee80211_is_trigger(__le16 fc) /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc or QoS nullfunc frame */ static inline bool ieee80211_is_any_nullfunc(__le16 fc) { @@ -787,6 +828,8 @@ static inline bool ieee80211_is_any_nullfunc(__le16 fc) /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder + * Return: whether or not the frame is the first fragment (also true if + * it's not fragmented at all) */ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { @@ -796,6 +839,7 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) /** * ieee80211_is_frag - check if a frame is a fragment * @hdr: 802.11 header of the frame + * Return: whether or not the frame is a fragment */ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) { @@ -2349,6 +2393,8 @@ struct ieee80211_eht_operation_info { * @max_vht_nss: current maximum NSS as advertised by the STA in * operating mode notification, can be 0 in which case the * capability data will be used to derive this (from MCS support) + * Return: The maximum NSS that can be used for the given bandwidth/MCS + * combination * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. @@ -4317,6 +4363,7 @@ struct ieee80211_he_6ghz_capa { /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame + * Return: a pointer to the QoS control field in the frame header * * The qos ctrl bytes come after the frame_control, duration, seq_num * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose @@ -4339,6 +4386,7 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) /** * ieee80211_get_tid - get qos TID * @hdr: the frame + * Return: the TID from the QoS control field */ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) { @@ -4350,6 +4398,7 @@ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame + * Return: a pointer to the source address (SA) * * Given an 802.11 frame, this function returns the offset * to the source address (SA). It does not verify that the @@ -4369,6 +4418,7 @@ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) /** * ieee80211_get_DA - get pointer to DA * @hdr: the frame + * Return: a pointer to the destination address (DA) * * Given an 802.11 frame, this function returns the offset * to the destination address (DA). It does not verify that @@ -4387,6 +4437,7 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @skb: the skb to check, starting with the 802.11 header + * Return: whether or not the MMPDU is bufferable */ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) { @@ -4425,6 +4476,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) /** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) + * Return: whether or not the frame is a robust management frame */ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { @@ -4461,6 +4513,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) /** * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a robust management frame */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { @@ -4473,6 +4526,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame * @len: length of the frame + * Return: whether or not the frame is a public action frame */ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, size_t len) @@ -4518,8 +4572,9 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @hdr: the frame + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) { @@ -4535,8 +4590,9 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) /** * ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { @@ -4548,6 +4604,7 @@ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs + * Return: the time value converted to microseconds */ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) { @@ -4559,6 +4616,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) * @tim: the TIM IE * @tim_len: length of the TIM IE * @aid: the AID to look for + * Return: whether or not traffic is indicated in the TIM for the given AID */ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) @@ -4585,8 +4643,10 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, } /** - * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) + * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked + * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action + * frame * * This function assumes the frame is a data frame, and that the network header * is in the correct place. @@ -4626,6 +4686,7 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb) /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked + * Return: %true if the frame contains a TPC element, %false otherwise * * This function checks if it's either TPC report action frame or Link * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 @@ -4743,6 +4804,7 @@ struct element { * @element: element pointer after for_each_element() or friends * @data: same data pointer as passed to for_each_element() or friends * @datalen: same data length as passed to for_each_element() or friends + * Return: %true if all elements were iterated, %false otherwise; see notes * * This function returns %true if all the data was parsed or considered * while walking the elements. Only use this if your for_each_element() @@ -4946,6 +5008,7 @@ struct ieee80211_mle_tdls_common_info { * ieee80211_mle_common_size - check multi-link element common size * @data: multi-link element, must already be checked for size using * ieee80211_mle_size_ok() + * Return: the size of the multi-link element's "common" subfield */ static inline u8 ieee80211_mle_common_size(const u8 *data) { @@ -4978,11 +5041,10 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) /** * ieee80211_mle_get_link_id - returns the link ID * @data: the basic multi link element + * Return: the link ID, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS link ID can't be found, -1 will be returned */ static inline int ieee80211_mle_get_link_id(const u8 *data) { @@ -5002,12 +5064,10 @@ static inline int ieee80211_mle_get_link_id(const u8 *data) /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @data: pointer to the basic multi link element + * Return: the BSS Parameter Change Count field value, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), -1 will be returned. */ static inline int ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) @@ -5030,13 +5090,13 @@ ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) /** * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the medium synchronization delay field value from the multi-link + * element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT) + * if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the medium synchronization is not present, then the default value is - * returned. */ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) { @@ -5060,12 +5120,12 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) /** * ieee80211_mle_get_eml_cap - returns the EML capability - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the EML capability field value from the multi-link element, + * or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the EML capability is not present, 0 will be returned. */ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) { @@ -5091,13 +5151,12 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) /** * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the MLD capabilities and operations field value from the multi-link + * element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD capabilities and operations field is not present, 0 will be - * returned. */ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) { @@ -5128,12 +5187,11 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) /** * ieee80211_mle_get_mld_id - returns the MLD ID - * @data: pointer to the multi link element + * @data: pointer to the multi-link element + * Return: The MLD ID in the given multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD ID is not present, 0 will be returned. */ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) { @@ -5168,6 +5226,7 @@ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element + * Return: whether or not the multi-link element size is OK */ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { @@ -5237,6 +5296,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) * @data: pointer to the element data * @type: expected type of the element * @len: length of the containing element + * Return: whether or not the multi-link element type matches and size is OK */ static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len) { @@ -5280,6 +5340,7 @@ struct ieee80211_mle_per_sta_profile { * profile size * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, size_t len) @@ -5364,6 +5425,7 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta * element sta profile size. * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, size_t len) -- cgit v1.2.3 From 9d222c12834d80849afbad7b42822b8ffbbc025f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:40 +0200 Subject: wifi: ieee80211: document two FTM related functions Add some documentation to ieee80211_is_timing_measurement() and ieee80211_is_ftm(). Link: https://msgid.link/20240515093852.229aa69e972c.Ifae6762a698e79cd5a49a055fe4c32330e826200@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 595f83783f0e..fb50a99daa93 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4731,6 +4731,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return true; } +/** + * ieee80211_is_timing_measurement - check if frame is timing measurement response + * @skb: the SKB to check + * Return: whether or not the frame is a valid timing measurement response + */ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; @@ -4750,6 +4755,11 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) return false; } +/** + * ieee80211_is_ftm - check if frame is FTM response + * @skb: the SKB to check + * Return: whether or not the frame is a valid FTM response action frame + */ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; -- cgit v1.2.3 From 8592fd7ccc95a7cf222985e6297041463e8b4e9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:54 +0200 Subject: wifi: ieee80211/ath11k: remove IEEE80211_MAX_NUM_PWR_LEVEL The define IEEE80211_MAX_NUM_PWR_LEVEL doesn't make much sense. Yes, that table has a maximum value of 8, and the table will actually remain that way, but EHT introduced a way to encode more levels for 320 MHz channels. Remove IEEE80211_MAX_NUM_PWR_LEVEL and, for ath11k being the only user, add ATH11K_NUM_PWR_LEVELS, where it makes sense since it cannot support 320 MHz channels. Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.9818e5471055.Icece7e47e963d6b68e0d97ba13c102b37fbaa689@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/core.h | 9 ++++++--- drivers/net/wireless/ath/ath11k/mac.c | 2 +- include/linux/ieee80211.h | 5 ----- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 205f40ee6b66..df24f0e409af 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -330,6 +330,9 @@ struct ath11k_chan_power_info { s8 tx_power; }; +/* ath11k only deals with 160 MHz, so 8 subchannels */ +#define ATH11K_NUM_PWR_LEVELS 8 + /** * struct ath11k_reg_tpc_power_info - regulatory TPC power info * @is_psd_power: is PSD power or not @@ -346,10 +349,10 @@ struct ath11k_reg_tpc_power_info { u8 eirp_power; enum wmi_reg_6ghz_ap_type ap_power_type; u8 num_pwr_levels; - u8 reg_max[IEEE80211_MAX_NUM_PWR_LEVEL]; + u8 reg_max[ATH11K_NUM_PWR_LEVELS]; u8 ap_constraint_power; - s8 tpe[IEEE80211_MAX_NUM_PWR_LEVEL]; - struct ath11k_chan_power_info chan_power_info[IEEE80211_MAX_NUM_PWR_LEVEL]; + s8 tpe[ATH11K_NUM_PWR_LEVELS]; + struct ath11k_chan_power_info chan_power_info[ATH11K_NUM_PWR_LEVELS]; }; struct ath11k_vif { diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 4f62e38ba48b..59e5e8085742 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7688,7 +7688,7 @@ void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_channel *chan, *temp_chan; u8 pwr_lvl_idx, num_pwr_levels, pwr_reduction; bool is_psd_power = false, is_tpe_present = false; - s8 max_tx_power[IEEE80211_MAX_NUM_PWR_LEVEL], + s8 max_tx_power[ATH11K_NUM_PWR_LEVELS], psd_power, tx_power; s8 eirp_power = 0; u16 start_freq, center_freq; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fb50a99daa93..1c3a683a3ee2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2833,11 +2833,6 @@ struct ieee80211_he_6ghz_oper { * So it it totally max 8 Transmit Power Envelope element. */ #define IEEE80211_TPE_MAX_IE_COUNT 8 -/* - * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield" - * of "IEEE Std 802.11ax™‐2021", the max power level is 8. - */ -#define IEEE80211_MAX_NUM_PWR_LEVEL 8 #define IEEE80211_TPE_MAX_POWER_COUNT 8 -- cgit v1.2.3 From 39dc8b8ea387ce7f4fe2d2d6d550ed52aa9aa040 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:56 +0200 Subject: wifi: mac80211: pass parsed TPE data to drivers Instead of passing the full TPE elements, in all their glory and mixed up data formats for HE backward compatibility, parse them fully into the right values, and pass that to the drivers. Also introduce proper validation already in mac80211, so that drivers don't need to do it, and parse the EHT portions. The code now passes the values in the right order according to the channel used by an interface, which could also be a subset of the data advertised by the AP, if we couldn't connect with the full bandwidth (for whatever reason.) Also add kunit tests for the more complicated bits of it. Reviewed-by: Miriam Rachel Korenblit Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.2aa839969b60.I265b28209e0b29772b2f125f7f83de44a4da877b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 143 ++++------------- include/linux/ieee80211.h | 102 ++++++++++-- include/net/mac80211.h | 41 ++++- net/mac80211/ieee80211_i.h | 13 +- net/mac80211/mlme.c | 230 +++++++++++++++++++++++---- net/mac80211/parse.c | 92 +++++++++-- net/mac80211/tests/Makefile | 2 +- net/mac80211/tests/tpe.c | 284 ++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 25 +++ 9 files changed, 761 insertions(+), 171 deletions(-) create mode 100644 net/mac80211/tests/tpe.c (limited to 'include') diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 73876760afe9..d4225bf30e51 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7507,32 +7507,6 @@ static int ath11k_mac_stop_vdev_early(struct ieee80211_hw *hw, return 0; } -static u8 ath11k_mac_get_tpe_count(u8 txpwr_intrprt, u8 txpwr_cnt) -{ - switch (txpwr_intrprt) { - /* Refer "Table 9-276-Meaning of Maximum Transmit Power Count subfield - * if the Maximum Transmit Power Interpretation subfield is 0 or 2" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP: - case IEEE80211_TPE_REG_CLIENT_EIRP: - txpwr_cnt = txpwr_cnt <= 3 ? txpwr_cnt : 3; - txpwr_cnt = txpwr_cnt + 1; - break; - /* Refer "Table 9-277-Meaning of Maximum Transmit Power Count subfield - * if Maximum Transmit Power Interpretation subfield is 1 or 3" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP_PSD: - case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: - txpwr_cnt = txpwr_cnt <= 4 ? txpwr_cnt : 4; - txpwr_cnt = txpwr_cnt ? (BIT(txpwr_cnt - 1)) : 1; - break; - } - - return txpwr_cnt; -} - static u8 ath11k_mac_get_num_pwr_levels(struct cfg80211_chan_def *chan_def) { if (chan_def->chan->flags & IEEE80211_CHAN_PSD) { @@ -7859,33 +7833,23 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; - struct ieee80211_tx_pwr_env *single_tpe; + struct ieee80211_parsed_tpe_eirp *non_psd = NULL; + struct ieee80211_parsed_tpe_psd *psd = NULL; enum wmi_reg_6ghz_client_type client_type; struct cur_regulatory_info *reg_info; + u8 local_tpe_count, reg_tpe_count; + bool use_local_tpe; int i; - u8 pwr_count, pwr_interpret, pwr_category; - u8 psd_index = 0, non_psd_index = 0, local_tpe_count = 0, reg_tpe_count = 0; - bool use_local_tpe, non_psd_set = false, psd_set = false; reg_info = &ab->reg_info_store[ar->pdev_idx]; client_type = reg_info->client_type; - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category == client_type) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP || - pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) - local_tpe_count++; - else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP || - pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) - reg_tpe_count++; - } - } + local_tpe_count = + bss_conf->tpe.max_local[client_type].valid + + bss_conf->tpe.psd_local[client_type].valid; + reg_tpe_count = + bss_conf->tpe.max_reg_client[client_type].valid + + bss_conf->tpe.psd_reg_client[client_type].valid; if (!reg_tpe_count && !local_tpe_count) { ath11k_warn(ab, @@ -7898,83 +7862,44 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, use_local_tpe = false; } - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category != client_type) - continue; - - /* get local transmit power envelope */ - if (use_local_tpe) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - /* get regulatory transmit power envelope */ - } else { - if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - } + if (use_local_tpe) { + psd = &bss_conf->tpe.psd_local[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_local[client_type]; + if (!non_psd->valid) + non_psd = NULL; + } else { + psd = &bss_conf->tpe.psd_reg_client[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_reg_client[client_type]; + if (!non_psd->valid) + non_psd = NULL; } - if (non_psd_set && !psd_set) { - single_tpe = &bss_conf->tx_pwr_env[non_psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + if (non_psd && !psd) { arvif->reg_tpc_info.is_psd_power = false; arvif->reg_tpc_info.eirp_power = 0; - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); + arvif->reg_tpc_info.num_pwr_levels = non_psd->count; for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, "non PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; + i, non_psd->power[i]); + arvif->reg_tpc_info.tpe[i] = non_psd->power[i] / 2; } } - if (psd_set) { - single_tpe = &bss_conf->tx_pwr_env[psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - arvif->reg_tpc_info.is_psd_power = true; + if (psd) { + arvif->reg_tpc_info.num_pwr_levels = psd->count; - if (pwr_count == 0) { + for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power : %d\n", single_tpe->tx_power[0]); - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_num_pwr_levels(&ctx->def); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[0] / 2; - } else { - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { - ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; - } + "TPE PSD power[%d] : %d\n", + i, psd->power[i]); + arvif->reg_tpc_info.tpe[i] = psd->power[i] / 2; } } } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c3a683a3ee2..769008a51809 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2825,17 +2825,6 @@ struct ieee80211_he_6ghz_oper { u8 minrate; } __packed; -/* - * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021", - * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation - * subfield encoding", and two category for each type in "Table E-12-Regulatory - * Info subfield encoding in the United States". - * So it it totally max 8 Transmit Power Envelope element. - */ -#define IEEE80211_TPE_MAX_IE_COUNT 8 - -#define IEEE80211_TPE_MAX_POWER_COUNT 8 - /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -2844,24 +2833,107 @@ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; +/* category type of transmit power envelope element */ +enum ieee80211_tx_power_category_6ghz { + IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, + IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, +}; + +/* + * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, + * setting to 63.5 dBm means no constraint. + */ +#define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 + +/* + * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, + * setting to 127 indicates no PSD limit for the 20 MHz channel. + */ +#define IEEE80211_TPE_PSD_NO_LIMIT 127 + /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope - * @tx_power_info: Transmit Power Information field - * @tx_power: Maximum Transmit Power field + * @info: Transmit Power Information field + * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { - u8 tx_power_info; - s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT]; + u8 info; + u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 +#define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF + +static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 needed = sizeof(*env); + u8 N; /* also called N in the spec */ + + if (len < needed) + return false; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (category) { + case IEEE80211_TPE_CAT_6GHZ_DEFAULT: + case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: + break; + default: + return false; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + if (count > 3) + return false; + + /* count == 0 encodes 1 value for 20 MHz, etc. */ + needed += count + 1; + + if (len < needed) + return false; + + /* there can be extension fields not accounted for in 'count' */ + + return true; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (count > 4) + return false; + + N = count ? 1 << (count - 1) : 1; + needed += N; + + if (len < needed) + return false; + + if (len > needed) { + u8 K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + needed += 1 + K; + if (len < needed) + return false; + } + + return true; + } + + return false; +} + /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..a4efbfb8d796 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -550,6 +550,39 @@ struct ieee80211_fils_discovery { u32 max_interval; }; +#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ 5 +struct ieee80211_parsed_tpe_eirp { + bool valid; + s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ]; + u8 count; +}; + +#define IEEE80211_TPE_PSD_ENTRIES_320MHZ 16 +struct ieee80211_parsed_tpe_psd { + bool valid; + s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 count, n; +}; + +/** + * struct ieee80211_parsed_tpe - parsed transmit power envelope information + * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each + * (indexed by TX power category) + * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80, + * 160, 320 MHz each + * (indexed by TX power category) + * @psd_local: maximum local power spectral density, one value for each 20 MHz + * subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + * @psd_reg_client: maximum regulatory power spectral density, one value for + * each 20 MHz subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + */ +struct ieee80211_parsed_tpe { + struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2]; + struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2]; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -662,8 +695,7 @@ struct ieee80211_fils_discovery { * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz - * @tx_pwr_env: transmit power envelope array of BSS. - * @tx_pwr_env_num: number of @tx_pwr_env. + * @tpe: transmit power envelope information * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @csa_active: marks whether a channel switch is going on. @@ -766,8 +798,9 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; - struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; + + struct ieee80211_parsed_tpe tpe; + u8 pwr_reduction; bool eht_support; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb62b7d4b4f7..7d541a2355f6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1708,7 +1709,6 @@ struct ieee802_11_elems { const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; - const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; const u8 *uora_element; const u8 *mesh_id; const u8 *peering; @@ -1746,6 +1746,9 @@ struct ieee802_11_elems { const struct ieee80211_bandwidth_indication *bandwidth_indication; const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; + /* not the order in the psd values is per element, not per chandef */ + struct ieee80211_parsed_tpe tpe; + /* length of them, respectively */ u8 ext_capab_len; u8 ssid_len; @@ -1764,8 +1767,6 @@ struct ieee802_11_elems { u8 perr_len; u8 country_elem_len; u8 bssid_index_len; - u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; u8 eht_cap_len; /* mult-link element can be de-fragmented and thus u8 is not sufficient */ @@ -2243,6 +2244,7 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac); +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe); void ieee80211_set_wmm_default(struct ieee80211_link_data *link, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, @@ -2681,6 +2683,11 @@ void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_d #define VISIBLE_IF_MAC80211_KUNIT ieee80211_rx_result ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx); +int ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans); +void ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used); #else #define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) #define VISIBLE_IF_MAC80211_KUNIT static diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e410a43dc681..ed9851faac05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -606,11 +606,195 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, return true; } +static int ieee80211_chandef_num_subchans(const struct cfg80211_chan_def *c) +{ + if (c->width == NL80211_CHAN_WIDTH_80P80) + return 4 + 4; + + return nl80211_chan_width_to_mhz(c->width) / 20; +} + +static int ieee80211_chandef_num_widths(const struct cfg80211_chan_def *c) +{ + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + return 1; + case NL80211_CHAN_WIDTH_40: + return 2; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + return 3; + case NL80211_CHAN_WIDTH_160: + return 4; + case NL80211_CHAN_WIDTH_320: + return 5; + default: + WARN_ON(1); + return 0; + } +} + +VISIBLE_IF_MAC80211_KUNIT int +ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans) +{ + int n = ieee80211_chandef_num_subchans(ap); + struct cfg80211_chan_def tmp = *ap; + int offset = 0; + + /* + * Given a chandef (in this context, it's the AP's) and a number + * of subchannels that we want to look at ('n_partial_subchans'), + * calculate the offset in number of subchannels between the full + * and the subset with the desired width. + */ + + /* same number of subchannels means no offset, obviously */ + if (n == n_partial_subchans) + return 0; + + /* don't WARN - misconfigured APs could cause this if their N > width */ + if (n < n_partial_subchans) + return 0; + + while (ieee80211_chandef_num_subchans(&tmp) > n_partial_subchans) { + u32 prev = tmp.center_freq1; + + ieee80211_chandef_downgrade(&tmp, NULL); + + /* + * if center_freq moved up, half the original channels + * are gone now but were below, so increase offset + */ + if (prev < tmp.center_freq1) + offset += ieee80211_chandef_num_subchans(&tmp); + } + + /* + * 80+80 with secondary 80 below primary - four subchannels for it + * (we cannot downgrade *to* 80+80, so no need to consider 'tmp') + */ + if (ap->width == NL80211_CHAN_WIDTH_80P80 && + ap->center_freq2 < ap->center_freq1) + offset += 4; + + return offset; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_calc_chandef_subchan_offset); + +VISIBLE_IF_MAC80211_KUNIT void +ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + u8 needed = ieee80211_chandef_num_subchans(used); + u8 have = ieee80211_chandef_num_subchans(ap); + u8 tmp[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 offset; + + if (!psd->valid) + return; + + /* if N is zero, all defaults were used, no point in rearranging */ + if (!psd->n) + goto out; + + BUILD_BUG_ON(sizeof(tmp) != sizeof(psd->power)); + + /* + * This assumes that 'N' is consistent with the HE channel, as + * it should be (otherwise the AP is broken). + * + * In psd->power we have values in the order 0..N, 0..K, where + * N+K should cover the entire channel per 'ap', but even if it + * doesn't then we've pre-filled 'unlimited' as defaults. + * + * But this is all the wrong order, we want to have them in the + * order of the 'used' channel. + * + * So for example, we could have a 320 MHz EHT AP, which has the + * HE channel as 80 MHz (e.g. due to puncturing, which doesn't + * seem to be considered for the TPE), as follows: + * + * EHT 320: | | | | | | | | | | | | | | | | | + * HE 80: | | | | | + * used 160: | | | | | | | | | + * + * N entries: |--|--|--|--| + * K entries: |--|--|--|--|--|--|--|--| |--|--|--|--| + * power idx: 4 5 6 7 8 9 10 11 0 1 2 3 12 13 14 15 + * full chan: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + * used chan: 0 1 2 3 4 5 6 7 + * + * The idx in the power array ('power idx') is like this since it + * comes directly from the element's N and K entries in their + * element order, and those are this way for HE compatibility. + * + * Rearrange them as desired here, first by putting them into the + * 'full chan' order, and then selecting the necessary subset for + * the 'used chan'. + */ + + /* first reorder according to AP channel */ + offset = ieee80211_calc_chandef_subchan_offset(ap, psd->n); + for (int i = 0; i < have; i++) { + if (i < offset) + tmp[i] = psd->power[i + psd->n]; + else if (i < offset + psd->n) + tmp[i] = psd->power[i - offset]; + else + tmp[i] = psd->power[i]; + } + + /* + * and then select the subset for the used channel + * (set everything to defaults first in case a driver is confused) + */ + memset(psd->power, IEEE80211_TPE_PSD_NO_LIMIT, sizeof(psd->power)); + offset = ieee80211_calc_chandef_subchan_offset(ap, needed); + for (int i = 0; i < needed; i++) + psd->power[i] = tmp[offset + i]; + +out: + /* limit, but don't lie if there are defaults in the data */ + if (needed < psd->count) + psd->count = needed; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_rearrange_tpe_psd); + +static void ieee80211_rearrange_tpe(struct ieee80211_parsed_tpe *tpe, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + /* ignore this completely for narrow/invalid channels */ + if (!ieee80211_chandef_num_subchans(ap) || + !ieee80211_chandef_num_subchans(used)) { + ieee80211_clear_tpe(tpe); + return; + } + + for (int i = 0; i < 2; i++) { + int needed_pwr_count; + + ieee80211_rearrange_tpe_psd(&tpe->psd_local[i], ap, used); + ieee80211_rearrange_tpe_psd(&tpe->psd_reg_client[i], ap, used); + + /* limit this to the widths we actually need */ + needed_pwr_count = ieee80211_chandef_num_widths(used); + if (needed_pwr_count < tpe->max_local[i].count) + tpe->max_local[i].count = needed_pwr_count; + if (needed_pwr_count < tpe->max_reg_client[i].count) + tpe->max_reg_client[i].count = needed_pwr_count; + } +} + static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, - struct ieee80211_chan_req *chanreq) + struct ieee80211_chan_req *chanreq, + struct cfg80211_chan_def *ap_chandef) { const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); struct ieee80211_bss *bss = (void *)cbss->priv; @@ -623,7 +807,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, }; struct ieee802_11_elems *elems; struct ieee80211_supported_band *sband; - struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; int ret; @@ -634,7 +817,7 @@ again: return ERR_PTR(-ENOMEM); ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, - elems, false, conn, &ap_chandef); + elems, false, conn, ap_chandef); /* this should be impossible since parsing depends on our mode */ if (WARN_ON(ap_mode > conn->mode)) { @@ -701,12 +884,12 @@ again: break; } - chanreq->oper = ap_chandef; + chanreq->oper = *ap_chandef; /* wider-bandwidth OFDMA is only done in EHT */ if (conn->mode >= IEEE80211_CONN_MODE_EHT && !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)) - chanreq->ap = ap_chandef; + chanreq->ap = *ap_chandef; else chanreq->ap.chan = NULL; @@ -738,7 +921,7 @@ again: IEEE80211_CONN_BW_LIMIT_160); } - if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode) + if (chanreq->oper.width != ap_chandef->width || ap_mode != conn->mode) sdata_info(sdata, "regulatory prevented using AP config, downgraded\n"); @@ -3275,9 +3458,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP; sdata->vif.bss_conf.pwr_reduction = 0; - sdata->vif.bss_conf.tx_pwr_env_num = 0; - memset(sdata->vif.bss_conf.tx_pwr_env, 0, - sizeof(sdata->vif.bss_conf.tx_pwr_env)); + ieee80211_clear_tpe(&sdata->vif.bss_conf.tpe); sdata->vif.cfg.eml_cap = 0; sdata->vif.cfg.eml_med_sync_delay = 0; @@ -5018,15 +5199,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; struct ieee802_11_elems *elems; int ret; - u32 i; lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, - &chanreq); + &chanreq, &ap_chandef); if (IS_ERR(elems)) { rcu_read_unlock(); @@ -5042,35 +5223,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - struct ieee80211_bss_conf *bss_conf; - u8 j = 0; - - bss_conf = link->conf; if (elems->pwr_constr_elem) - bss_conf->pwr_reduction = *elems->pwr_constr_elem; - - BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != - ARRAY_SIZE(elems->tx_pwr_env)); - - for (i = 0; i < elems->tx_pwr_env_num; i++) { - if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j])) - continue; - - bss_conf->tx_pwr_env_num++; - memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], - elems->tx_pwr_env_len[i]); - j++; - } + link->conf->pwr_reduction = *elems->pwr_constr_elem; he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); if (he_6ghz_oper) - bss_conf->power_type = + link->conf->power_type = ieee80211_ap_power_type(he_6ghz_oper->control); else link_info(link, "HE 6 GHz operation missing (on %d MHz), expect issues\n", cbss->channel->center_freq); + + link->conf->tpe = elems->tpe; + ieee80211_rearrange_tpe(&link->conf->tpe, &ap_chandef, + &chanreq.oper); } rcu_read_unlock(); /* the element data was RCU protected so no longer valid anyway */ @@ -7558,6 +7726,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) wiphy_delayed_work_init(&link->u.mgd.chswitch_work, ieee80211_chswitch_work); + ieee80211_clear_tpe(&link->conf->tpe); + if (sdata->u.mgd.assoc_data) ether_addr_copy(link->conf->addr, sdata->u.mgd.assoc_data->link[link_id].addr); diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 55e5497f8978..6efeb977f8e5 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -187,6 +187,84 @@ ieee80211_parse_extension_element(u32 *crc, *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); } +static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe, + const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 *out, N, *cnt_out = NULL, *N_out = NULL; + + if (!ieee80211_valid_tpe_element(data, len)) + return; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + out = tpe->max_local[category].power; + cnt_out = &tpe->max_local[category].count; + tpe->max_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP: + out = tpe->max_reg_client[category].power; + cnt_out = &tpe->max_reg_client[category].count; + tpe->max_reg_client[category].valid = true; + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + out = tpe->psd_local[category].power; + cnt_out = &tpe->psd_local[category].count; + N_out = &tpe->psd_local[category].n; + tpe->psd_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + out = tpe->psd_reg_client[category].power; + cnt_out = &tpe->psd_reg_client[category].count; + N_out = &tpe->psd_reg_client[category].n; + tpe->psd_reg_client[category].valid = true; + break; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + /* count was validated <= 3, plus 320 MHz */ + BUILD_BUG_ON(IEEE80211_TPE_EIRP_ENTRIES_320MHZ < 5); + memcpy(out, env->variable, count + 1); + *cnt_out = count + 1; + /* separately take 320 MHz if present */ + if (count == 3 && len > sizeof(*env) + count + 1) { + out[4] = env->variable[count + 2]; + *cnt_out = 5; + } + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (!count) { + memset(out, env->variable[0], + IEEE80211_TPE_PSD_ENTRIES_320MHZ); + *cnt_out = IEEE80211_TPE_PSD_ENTRIES_320MHZ; + break; + } + + N = 1 << (count - 1); + memcpy(out, env->variable, N); + *cnt_out = N; + *N_out = N; + + if (len > sizeof(*env) + N) { + int K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + K = min(K, IEEE80211_TPE_PSD_ENTRIES_320MHZ - N); + memcpy(out + N, env->variable + N + 1, K); + (*cnt_out) += K; + } + break; + } +} + static u32 _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, struct ieee80211_elems_parse *elems_parse, @@ -593,16 +671,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->rsnx_len = elen; break; case WLAN_EID_TX_POWER_ENVELOPE: - if (elen < 1 || - elen > sizeof(struct ieee80211_tx_pwr_env)) - break; - - if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) + if (params->mode < IEEE80211_CONN_MODE_HE) break; - - elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; - elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; - elems->tx_pwr_env_num++; + ieee80211_parse_tpe(&elems->tpe, pos, elen); break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? @@ -889,6 +960,9 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) elems->ie_start = params->start; elems->total_len = params->len; + /* set all TPE entries to unlimited (but invalid) */ + ieee80211_clear_tpe(&elems->tpe); + nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = ieee802_11_find_bssid_profile(params->start, params->len, diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile index 4fdaf3feaca3..511dfa226699 100644 --- a/net/mac80211/tests/Makefile +++ b/net/mac80211/tests/Makefile @@ -1,3 +1,3 @@ -mac80211-tests-y += module.o elems.o mfp.o +mac80211-tests-y += module.o elems.o mfp.o tpe.o obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/tpe.c b/net/mac80211/tests/tpe.c new file mode 100644 index 000000000000..dd63303a2985 --- /dev/null +++ b/net/mac80211/tests/tpe.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for TPE element handling + * + * Copyright (C) 2024 Intel Corporation + */ +#include +#include "../ieee80211_i.h" + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static struct ieee80211_channel chan6g_1 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 5955, +}; + +static struct ieee80211_channel chan6g_33 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6115, +}; + +static struct ieee80211_channel chan6g_61 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6255, +}; + +static const struct subchan_test_case { + const char *desc; + struct cfg80211_chan_def c; + u8 n; + int expect; +} subchan_offset_cases[] = { + { + .desc = "identical 20 MHz", + .c.width = NL80211_CHAN_WIDTH_20, + .c.chan = &chan6g_1, + .c.center_freq1 = 5955, + .n = 1, + .expect = 0, + }, + { + .desc = "identical 40 MHz", + .c.width = NL80211_CHAN_WIDTH_40, + .c.chan = &chan6g_1, + .c.center_freq1 = 5965, + .n = 2, + .expect = 0, + }, + { + .desc = "identical 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 16, + .expect = 0, + }, + { + .desc = "identical 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 16, + .expect = 0, + }, + { + .desc = "lower 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 8, + .expect = 0, + }, + { + .desc = "upper 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 8, + .expect = 8, + }, + { + .desc = "upper 160 MHz of 320 MHz, go to 40", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 2, + .expect = 8 + 4 + 2, + }, + { + .desc = "secondary 80 above primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 4, + .expect = 0, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 4, + .expect = 4, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz, go to 20", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 1, + .expect = 7, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(subchan_offset, subchan_offset_cases, desc); + +static void subchan_offset(struct kunit *test) +{ + const struct subchan_test_case *params = test->param_value; + int offset; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->c), true); + + offset = ieee80211_calc_chandef_subchan_offset(¶ms->c, params->n); + + KUNIT_EXPECT_EQ(test, params->expect, offset); +} + +static const struct psd_reorder_test_case { + const char *desc; + struct cfg80211_chan_def ap, used; + struct ieee80211_parsed_tpe_psd psd, out; +} psd_reorder_cases[] = { + { + .desc = "no changes, 320 MHz", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_320, + .used.chan = &chan6g_1, + .used.center_freq1 = 6105, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 8, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 16, + .out.n = 8, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + }, + { + .desc = "no changes, 320 MHz, 160 MHz used, n=0", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 0, + .psd.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + + .out.valid = true, + .out.count = 8, + .out.n = 0, + .out.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + }, + { + .desc = "320 MHz, HE is 80, used 160, all lower", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, all upper", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_61, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_61, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 12, 13, 14, 15, 0, 1, 2, 3, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, split", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_33, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_33, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 12, 13, 14, 15, 127, 127, 127, 127, 127, 127, 127, 127}, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(psd_reorder, psd_reorder_cases, desc); + +static void psd_reorder(struct kunit *test) +{ + const struct psd_reorder_test_case *params = test->param_value; + struct ieee80211_parsed_tpe_psd tmp = params->psd; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->ap), true); + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->used), true); + + ieee80211_rearrange_tpe_psd(&tmp, ¶ms->ap, ¶ms->used); + KUNIT_EXPECT_MEMEQ(test, &tmp, ¶ms->out, sizeof(tmp)); +} + +static struct kunit_case tpe_test_cases[] = { + KUNIT_CASE_PARAM(subchan_offset, subchan_offset_gen_params), + KUNIT_CASE_PARAM(psd_reorder, psd_reorder_gen_params), + {} +}; + +static struct kunit_suite tpe = { + .name = "mac80211-tpe", + .test_cases = tpe_test_cases, +}; + +kunit_test_suite(tpe); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6139c930b572..183de2075fb9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4334,3 +4334,28 @@ ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef) return IEEE80211_CONN_BW_LIMIT_20; } } + +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe) +{ + for (int i = 0; i < 2; i++) { + tpe->max_local[i].valid = false; + memset(tpe->max_local[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_local[i].power)); + + tpe->max_reg_client[i].valid = false; + memset(tpe->max_reg_client[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_reg_client[i].power)); + + tpe->psd_local[i].valid = false; + memset(tpe->psd_local[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_local[i].power)); + + tpe->psd_reg_client[i].valid = false; + memset(tpe->psd_reg_client[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_reg_client[i].power)); + } +} -- cgit v1.2.3 From 5a009b42e0418d30b3ffaff2f46c534cd79b3f23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:57 +0200 Subject: wifi: mac80211: track changes in AP's TPE If the TPE (transmit power envelope) is changed, detect and report that to the driver. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506214536.103dda923f45.I990877e409ab8eade9ed7c172272e0cae57256cf@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a4efbfb8d796..a59eacfe0480 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -362,6 +362,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed + * @BSS_CHANGED_TPE: transmit power envelope changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -398,6 +399,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), + BSS_CHANGED_TPE = BIT_ULL(35), /* when adding here, make sure to change ieee80211_reconfig */ }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ed9851faac05..08c0999746fb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -973,6 +973,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; u32 vht_cap_info = 0; u16 ht_opmode; @@ -988,7 +989,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info, elems, true, &link->u.mgd.conn, - &chanreq.ap); + &ap_chandef); if (ap_mode != link->u.mgd.conn.mode) { link_info(link, @@ -998,7 +999,8 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } - chanreq.oper = chanreq.ap; + chanreq.ap = ap_chandef; + chanreq.oper = ap_chandef; if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) chanreq.ap.chan = NULL; @@ -1026,6 +1028,16 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ieee80211_min_bw_limit_from_chandef(&chanreq.oper)) ieee80211_chandef_downgrade(&chanreq.oper, NULL); + if (ap_chandef.chan->band == NL80211_BAND_6GHZ && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) { + ieee80211_rearrange_tpe(&elems->tpe, &ap_chandef, + &chanreq.oper); + if (memcmp(&link->conf->tpe, &elems->tpe, sizeof(elems->tpe))) { + link->conf->tpe = elems->tpe; + *changed |= BSS_CHANGED_TPE; + } + } + if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq)) return 0; -- cgit v1.2.3 From f9a0757a4b2f5df4376963c25a3d7d7aeba78444 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 15 May 2024 11:13:18 -0700 Subject: wifi: mac80211: Add EHT UL MU-MIMO flag in ieee80211_bss_conf Add flag for Full Bandwidth UL MU-MIMO for EHT. This is utilized to pass EHT MU-MIMO configurations from user space to driver in AP mode. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Pradeep Kumar Chitrapu Link: https://msgid.link/20240515181327.12855-2-quic_pradeepc@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/cfg.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a59eacfe0480..ecfa65ade226 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -735,6 +735,9 @@ struct ieee80211_parsed_tpe { * beamformee * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU * beamformer + * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the + * reception of an EHT TB PPDU on an RU that spans the entire PPDU + * bandwidth */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -828,6 +831,7 @@ struct ieee80211_bss_conf { bool eht_su_beamformer; bool eht_su_beamformee; bool eht_mu_beamformer; + bool eht_80mhz_full_bw_ul_mumimo; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3319c9360507..62119e957cd8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1379,6 +1379,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + link_conf->eht_80mhz_full_bw_ul_mumimo = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; -- cgit v1.2.3 From aa4ec06c455d0200eea0a4361cc58eb5b8bf68c4 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 17 May 2024 18:33:31 +0300 Subject: wifi: cfg80211: use __counted_by where appropriate Annotate 'sub_specs' of 'struct cfg80211_sar_specs', 'channels' of 'struct cfg80211_sched_scan_request', 'channels' of 'struct cfg80211_wowlan_nd_match', and 'matches' of 'struct cfg80211_wowlan_nd_info' with '__counted_by' attribute. Briefly tested with clang 18.1.1 and CONFIG_UBSAN_BOUNDS running iwlwifi. Signed-off-by: Dmitry Antipov Link: https://msgid.link/20240517153332.18271-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cbf1664dc569..d79180bec7a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2200,7 +2200,7 @@ struct cfg80211_sar_sub_specs { struct cfg80211_sar_specs { enum nl80211_sar_type type; u32 num_sub_specs; - struct cfg80211_sar_sub_specs sub_specs[]; + struct cfg80211_sar_sub_specs sub_specs[] __counted_by(num_sub_specs); }; @@ -2838,7 +2838,7 @@ struct cfg80211_sched_scan_request { struct list_head list; /* keep last */ - struct ieee80211_channel *channels[]; + struct ieee80211_channel *channels[] __counted_by(n_channels); }; /** @@ -3582,7 +3582,7 @@ struct cfg80211_coalesce { struct cfg80211_wowlan_nd_match { struct cfg80211_ssid ssid; int n_channels; - u32 channels[]; + u32 channels[] __counted_by(n_channels); }; /** @@ -3596,7 +3596,7 @@ struct cfg80211_wowlan_nd_match { */ struct cfg80211_wowlan_nd_info { int n_matches; - struct cfg80211_wowlan_nd_match *matches[]; + struct cfg80211_wowlan_nd_match *matches[] __counted_by(n_matches); }; /** -- cgit v1.2.3 From 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:32 -0700 Subject: net: Rename mono_delivery_time to tstamp_type for scalabilty mono_delivery_time was added to check if skb->tstamp has delivery time in mono clock base (i.e. EDT) otherwise skb->tstamp has timestamp in ingress and delivery_time at egress. Renaming the bitfield from mono_delivery_time to tstamp_type is for extensibilty for other timestamps such as userspace timestamp (i.e. SO_TXTIME) set via sock opts. As we are renaming the mono_delivery_time to tstamp_type, it makes sense to start assigning tstamp_type based on enum defined in this commit. Earlier we used bool arg flag to check if the tstamp is mono in function skb_set_delivery_time, Now the signature of the functions accepts tstamp_type to distinguish between mono and real time. Also skb_set_delivery_type_by_clockid is a new function which accepts clockid to determine the tstamp_type. In future tstamp_type:1 can be extended to support userspace timestamp by increasing the bitfield. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 52 ++++++++++++++++++++++-------- include/net/inet_frag.h | 4 +-- net/bridge/netfilter/nf_conntrack_bridge.c | 6 ++-- net/core/dev.c | 2 +- net/core/filter.c | 10 +++--- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 9 +++--- net/ipv4/tcp_output.c | 14 ++++---- net/ipv6/ip6_output.c | 6 ++-- net/ipv6/netfilter.c | 6 ++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/sched/act_bpf.c | 4 +-- net/sched/cls_bpf.c | 4 +-- 17 files changed, 78 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b97c93a6de..3a721cc3b644 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -706,6 +706,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, +}; + /** * DOC: Basic sk_buff geometry * @@ -823,10 +828,8 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -954,7 +957,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:1; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -4183,7 +4186,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4192,10 +4195,33 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4205,8 +4231,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4216,7 +4242,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4224,7 +4250,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4232,7 +4258,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 153960663ce4..5af6eb14c5db 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -76,7 +76,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far - * @mono_delivery_time: stamp has a mono delivery time (EDT) + * @tstamp_type: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -97,7 +97,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; - u8 mono_delivery_time; + u8 tstamp_type; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index c3c51b9a6826..816bb0fde718 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; unsigned int hlen, ll_rs, mtu; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -113,7 +113,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/core/dev.c b/net/core/dev.c index e1bb6d7856d9..85fe8138f3e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2160,7 +2160,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); } diff --git a/net/core/filter.c b/net/core/filter.c index 2510464692af..a3781a796da4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7730,13 +7730,13 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, if (!tstamp) return -EINVAL; skb->tstamp = tstamp; - skb->mono_delivery_time = 1; + skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; case BPF_SKB_TSTAMP_UNSPEC: if (tstamp) return -EINVAL; skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; break; default: return -EINVAL; @@ -9443,7 +9443,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); - /* skb->tc_at_ingress && skb->mono_delivery_time, + /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ *insn++ = BPF_MOV64_IMM(value_reg, 0); @@ -9468,7 +9468,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, * the bpf prog is aware the tstamp could have delivery time. * Thus, write skb->tstamp as is if tstamp_type_access is true. * Otherwise, writing at ingress will have to clear the - * mono_delivery_time bit also. + * skb->tstamp_type bit also. */ if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; @@ -9478,7 +9478,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto */ *insn++ = BPF_JMP_A(2); - /* : mono_delivery_time */ + /* : skb->tstamp_type */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 56ef873828f4..867d637d86f0 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, goto err; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index faaec92a46ac..d179a2c84222 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; - head->mono_delivery_time = q->mono_delivery_time; + head->tstamp_type = q->tstamp_type; if (sk) refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 08e2c92e25ab..a92664a5ef2e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; - qp->q.mono_delivery_time = skb->mono_delivery_time; + qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9500031a1f55..fe86cadfa85b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; struct sk_buff *skb2; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; @@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, } } - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) @@ -912,7 +912,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, skb2); if (err) goto fail; @@ -1649,7 +1649,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - nskb->mono_delivery_time = !!transmit_time; + if (transmit_time) + nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 95caf8aaa8be..d44371cfa6ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1301,7 +1301,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); if (clone_it) { oskb = skb; @@ -1655,7 +1655,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - skb_set_delivery_time(buff, skb->tstamp, true); + skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC); tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -2764,7 +2764,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ tp->tcp_wstamp_ns = tp->tcp_clock_cache; - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3752,11 +3752,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb_set_delivery_time(skb, cookie_init_timestamp(req, now), - true); + SKB_CLOCK_MONOTONIC); else #endif { - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3843,7 +3843,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); tcp_add_tx_delay(skb, tp); return skb; @@ -4027,7 +4027,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..1ab0f23d37bf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -1016,7 +1016,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(frag, tstamp, mono_delivery_time); + skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 53d255838e6a..e0c2347b4dc6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -225,7 +225,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5e1b50c6a44d..6f0844c9315d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 327caca64257..a48be617a8ab 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c3605485b68..8333005c5c2e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); - skb_set_delivery_time(buff, tcp_transmit_time(sk), true); + skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 0e3cf11ae5fc..396b576390d0 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5e83e890f6a4..1941ebec23ff 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (prog->exts_integrated) { res->class = 0; -- cgit v1.2.3 From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 +++++++++++------ include/uapi/linux/bpf.h | 15 +++++++++----- net/core/filter.c | 46 ++++++++++++++++++++++++------------------ net/ipv4/ip_output.c | 5 ++++- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/ip6_output.c | 5 ++++- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 10 +++++++-- net/packet/af_packet.c | 7 +++---- tools/include/uapi/linux/bpf.h | 15 +++++++++----- 11 files changed, 81 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a721cc3b644..1e5c97daaa37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** @@ -957,7 +959,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 tstamp_type:1; /* See skb_tstamp_type */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -1087,15 +1089,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; default: WARN_ON_ONCE(1); kt = 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/net/core/filter.c b/net/core/filter.c index a3781a796da4..c6edfe9f41bc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, return -EOPNOTSUPP; switch (tstamp_type) { - case BPF_SKB_TSTAMP_DELIVERY_MONO: + case BPF_SKB_CLOCK_REALTIME: + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_REALTIME; + break; + case BPF_SKB_CLOCK_MONOTONIC: if (!tstamp) return -EINVAL; skb->tstamp = tstamp; skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; - case BPF_SKB_TSTAMP_UNSPEC: - if (tstamp) + case BPF_SKB_CLOCK_TAI: + if (!tstamp) return -EINVAL; - skb->tstamp = 0; - skb->tstamp_type = SKB_CLOCK_REALTIME; + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_TAI; break; default: return -EINVAL; @@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, { __u8 value_reg = si->dst_reg; __u8 skb_reg = si->src_reg; - /* AX is needed because src_reg and dst_reg could be the same */ - __u8 tmp_reg = BPF_REG_AX; - - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, - SKB_MONO_DELIVERY_TIME_MASK, 2); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); - *insn++ = BPF_JMP_A(1); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); + BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI); + BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME); + BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC); + BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI); + *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT); +#else + BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1)); +#endif return insn; } @@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); + /* check if ingress mask bits is set */ + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); + *insn++ = BPF_JMP_A(4); + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1); + *insn++ = BPF_JMP_A(2); /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ @@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, /* goto */ *insn++ = BPF_JMP_A(2); /* : skb->tstamp_type */ - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fe86cadfa85b..b90d0f78ac80 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; - skb->tstamp = cork->transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid); /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4cb43401e0e0..1a0953650356 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30ef0c8f5e92..8f70b8d1d1e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void) */ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + sk->sk_clockid = CLOCK_MONOTONIC; + per_cpu(ipv4_tcp_sk, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1ab0f23d37bf..e7a19df3125e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2eedf255600b..f838366e8256 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8333005c5c2e..750aa681779c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = { static int __net_init tcpv6_net_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, - SOCK_RAW, IPPROTO_TCP, net); + int res; + + res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, + SOCK_RAW, IPPROTO_TCP, net); + if (!res) + net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; + + return res; } static void __net_exit tcpv6_net_exit(struct net *net) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..fce390887591 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2056,8 +2056,7 @@ retry: skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); - skb->tstamp = sockc.transmit_time; - + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; - skb->tstamp = sockc.transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3 From 2c1713a8f1c94033a6e00aae4693ab03e8a3b9f1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 18 May 2024 16:58:47 +0200 Subject: bpf: constify member bpf_sysctl_kern:: Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers, for which bpf_sysctl_kern::table is also used. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch this utility type which is not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Signed-off-by: Daniel Borkmann Reviewed-by: Joel Granados Link: https://lore.kernel.org/bpf/20240518-sysctl-const-handler-bpf-v1-1-f0d7186743c1@weissschuh.net --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..b02aea291b7e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1406,7 +1406,7 @@ struct bpf_sock_ops_kern { struct bpf_sysctl_kern { struct ctl_table_header *head; - struct ctl_table *table; + const struct ctl_table *table; void *cur_val; size_t cur_len; void *new_val; -- cgit v1.2.3 From 8526f8c877baf3f9e678b31fd7d1066b776775cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:02:13 +0200 Subject: wifi: nl80211: clean up coalescing rule handling There's no need to allocate a tiny struct and then an array again, just allocate the two together and use __counted_by(). Also unify the freeing. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523120213.48a40cfb96f9.Ia02bf8f8fefbf533c64c5fa26175848d4a3a7899@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 47 ++++++++++++++++------------------------------- net/wireless/nl80211.h | 4 ++-- 4 files changed, 21 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d79180bec7a1..5da9bb0ac6a4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3566,8 +3566,8 @@ struct cfg80211_coalesce_rules { * @n_rules: number of rules */ struct cfg80211_coalesce { - struct cfg80211_coalesce_rules *rules; int n_rules; + struct cfg80211_coalesce_rules rules[] __counted_by(n_rules); }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 61f7cd8a8e9c..7c35349b9596 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1145,7 +1145,8 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->background_cac_abort_wk); cfg80211_rdev_free_wowlan(rdev); - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6ba988a6f5a2..8ff5f79d446a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13897,9 +13897,8 @@ nla_put_failure: return -ENOBUFS; } -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce) { - struct cfg80211_coalesce *coalesce = rdev->coalesce; int i, j; struct cfg80211_coalesce_rules *rule; @@ -13908,13 +13907,13 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; + if (!rule) + continue; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); } - kfree(coalesce->rules); kfree(coalesce); - rdev->coalesce = NULL; } static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, @@ -14012,17 +14011,16 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; - struct cfg80211_coalesce new_coalesce = {}; - struct cfg80211_coalesce *n_coalesce; - int err, rem_rule, n_rules = 0, i, j; + struct cfg80211_coalesce *new_coalesce; + int err, rem_rule, n_rules = 0, i; struct nlattr *rule; - struct cfg80211_coalesce_rules *tmp_rule; if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; rdev_set_coalesce(rdev, NULL); return 0; } @@ -14033,47 +14031,34 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) if (n_rules > coalesce->n_rules) return -EINVAL; - new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]), - GFP_KERNEL); - if (!new_coalesce.rules) + new_coalesce = kzalloc(struct_size(new_coalesce, rules, n_rules), + GFP_KERNEL); + if (!new_coalesce) return -ENOMEM; - new_coalesce.n_rules = n_rules; + new_coalesce->n_rules = n_rules; i = 0; nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) { err = nl80211_parse_coalesce_rule(rdev, rule, - &new_coalesce.rules[i]); + &new_coalesce->rules[i]); if (err) goto error; i++; } - err = rdev_set_coalesce(rdev, &new_coalesce); + err = rdev_set_coalesce(rdev, new_coalesce); if (err) goto error; - n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL); - if (!n_coalesce) { - err = -ENOMEM; - goto error; - } - cfg80211_rdev_free_coalesce(rdev); - rdev->coalesce = n_coalesce; + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = new_coalesce; return 0; error: - for (i = 0; i < new_coalesce.n_rules; i++) { - tmp_rule = &new_coalesce.rules[i]; - if (!tmp_rule) - continue; - for (j = 0; j < tmp_rule->n_patterns; j++) - kfree(tmp_rule->patterns[j].mask); - kfree(tmp_rule->patterns); - } - kfree(new_coalesce.rules); + cfg80211_free_coalesce(new_coalesce); return err; } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 6376f3a87f8a..ffaab9a92e5b 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018, 2020-2022 Intel Corporation + * Copyright (C) 2018, 2020-2024 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -119,7 +119,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id); -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev); +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce); /* peer measurement */ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From 5e514f1cba090e1c8fff03e92a175eccfe46305f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:50 +0000 Subject: tcp: add tcp_done_with_error() helper tcp_reset() ends with a sequence that is carefuly ordered. We need to fix [e]poll bugs in the following patches, it makes sense to use a common helper. Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 32 +++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 060e95b331a2..32815a40dea1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 681b54e1f3a6..2a8f8d8676ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -598,7 +598,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask |= EPOLLOUT | EPOLLWRNORM; } - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */ smp_rmb(); if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..5aadf64e554d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4436,9 +4436,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, return SKB_NOT_DROPPED_YET; } + +void tcp_done_with_error(struct sock *sk, int err) +{ + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + WRITE_ONCE(sk->sk_err, err); + smp_wmb(); + + tcp_write_queue_purge(sk); + tcp_done(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk_error_report(sk); +} +EXPORT_SYMBOL(tcp_done_with_error); + /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) { + int err; + trace_tcp_receive_reset(sk); /* mptcp can't tell us to ignore reset pkts, @@ -4450,24 +4467,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - WRITE_ONCE(sk->sk_err, ECONNREFUSED); + err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: - WRITE_ONCE(sk->sk_err, EPIPE); + err = EPIPE; break; case TCP_CLOSE: return; default: - WRITE_ONCE(sk->sk_err, ECONNRESET); + err = ECONNRESET; } - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - - tcp_write_queue_purge(sk); - tcp_done(sk); - - if (!sock_flag(sk, SOCK_DEAD)) - sk_error_report(sk); + tcp_done_with_error(sk, err); } /* -- cgit v1.2.3 From bbb31b7ae14594aa2a7e74923ee38f312404ad66 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 28 May 2024 16:05:09 +0100 Subject: net: dsa: remove mac_prepare()/mac_finish() shims No DSA driver makes use of the mac_prepare()/mac_finish() shimmed operations anymore, so we can remove these. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/E1sByNx-00ELW1-Vp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 6 ------ net/dsa/dsa.c | 2 -- net/dsa/port.c | 32 -------------------------------- 3 files changed, 40 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b60e7e410aba..f9ae3ca66b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -882,15 +882,9 @@ struct dsa_switch_ops { struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); - int (*phylink_mac_prepare)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); - int (*phylink_mac_finish)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 12521a7d4048..668c729946ea 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1507,9 +1507,7 @@ static int dsa_switch_probe(struct dsa_switch *ds) if (ds->phylink_mac_ops) { if (ds->ops->phylink_mac_select_pcs || - ds->ops->phylink_mac_prepare || ds->ops->phylink_mac_config || - ds->ops->phylink_mac_finish || ds->ops->phylink_mac_link_down || ds->ops->phylink_mac_link_up) return -EINVAL; diff --git a/net/dsa/port.c b/net/dsa/port.c index 9a249d4ac3a5..e23db9507546 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1549,21 +1549,6 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config, return pcs; } -static int dsa_port_phylink_mac_prepare(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_prepare) - err = ds->ops->phylink_mac_prepare(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -1577,21 +1562,6 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } -static int dsa_port_phylink_mac_finish(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_finish) - err = ds->ops->phylink_mac_finish(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) @@ -1624,9 +1594,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .mac_select_pcs = dsa_port_phylink_mac_select_pcs, - .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, - .mac_finish = dsa_port_phylink_mac_finish, .mac_link_down = dsa_port_phylink_mac_link_down, .mac_link_up = dsa_port_phylink_mac_link_up, }; -- cgit v1.2.3 From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:39 -0700 Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops. Pass an additional pointer of bpf_struct_ops_link to callback function reg, unreg, and update provided by subsystems defined in bpf_struct_ops. A bpf_struct_ops_map can be registered for multiple links. Passing a pointer of bpf_struct_ops_link helps subsystems to distinguish them. This pointer will be used in the later patches to let the subsystem initiate a detachment on a link that was registered to it previously. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 +++--- kernel/bpf/bpf_struct_ops.c | 10 +++++----- net/bpf/bpf_dummy_struct_ops.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 6 +++--- tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 4 ++-- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..19f8836382fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1730,9 +1730,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 86c7884abaf8..1542dded7489 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -757,7 +757,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - err = st_ops->reg(kdata); + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after * 'st_ops->reg()' is secure since the state of the @@ -805,7 +805,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -1060,7 +1060,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) /* st_link->map can be NULL if * bpf_struct_ops_link_create() fails to register. */ - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); bpf_map_put(&st_map->map); } kfree(st_link); @@ -1125,7 +1125,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } - err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) goto err_out; @@ -1176,7 +1176,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 891cdf61c65a..3ea52b05adfb 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t, return -EOPNOTSUPP; } -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { return -EOPNOTSUPP; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 18227757ec0c..3f88d0961e5b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t, return 0; } -static int bpf_tcp_ca_reg(void *kdata) +static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link) { return tcp_register_congestion_control(kdata); } -static void bpf_tcp_ca_unreg(void *kdata) +static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link) { tcp_unregister_congestion_control(kdata); } -static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link) { return tcp_update_congestion_control(kdata, old_kdata); } diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..0a09732cde4b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; -- cgit v1.2.3 From 1adddc97aa44c8783f9f0276ea70854d56f9f6df Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:41 -0700 Subject: bpf: support epoll from bpf struct_ops links. Add epoll support to bpf struct_ops links to trigger EPOLLHUP event upon detachment. This patch implements the "poll" of the "struct file_operations" for BPF links and introduces a new "poll" operator in the "struct bpf_link_ops". By implementing "poll" of "struct bpf_link_ops" for the links of struct_ops, the file descriptor of a struct_ops link can be added to an epoll file descriptor to receive EPOLLHUP events. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-4-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 17 +++++++++++++++++ kernel/bpf/syscall.c | 31 ++++++++++++++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19f8836382fc..5eb61120e4f5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1612,6 +1612,7 @@ struct bpf_link_ops { struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 58314b1fc39c..a2cf31b14be4 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -12,6 +12,7 @@ #include #include #include +#include struct bpf_struct_ops_value { struct bpf_struct_ops_common_value common; @@ -56,6 +57,7 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_link { struct bpf_link link; struct bpf_map __rcu *map; + wait_queue_head_t wait_hup; }; static DEFINE_MUTEX(update_mutex); @@ -1167,15 +1169,28 @@ static int bpf_struct_ops_map_link_detach(struct bpf_link *link) mutex_unlock(&update_mutex); + wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP); + return 0; } +static __poll_t bpf_struct_ops_map_link_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct bpf_struct_ops_link *st_link = file->private_data; + + poll_wait(file, &st_link->wait_hup, pts); + + return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .detach = bpf_struct_ops_map_link_detach, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, .update_map = bpf_struct_ops_map_link_update, + .poll = bpf_struct_ops_map_link_poll, }; int bpf_struct_ops_link_create(union bpf_attr *attr) @@ -1208,6 +1223,8 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; + init_waitqueue_head(&link->wait_hup); + /* Hold the update_mutex such that the subsystem cannot * do link->ops->detach() before the link is fully initialized. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2222c3ff88e7..81efa1944942 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3150,6 +3150,13 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) } #endif +static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) +{ + struct bpf_link *link = file->private_data; + + return link->ops->poll(file, pts); +} + static const struct file_operations bpf_link_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_link_show_fdinfo, @@ -3159,6 +3166,16 @@ static const struct file_operations bpf_link_fops = { .write = bpf_dummy_write, }; +static const struct file_operations bpf_link_fops_poll = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_link_show_fdinfo, +#endif + .release = bpf_link_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, + .poll = bpf_link_poll, +}; + static int bpf_link_alloc_id(struct bpf_link *link) { int id; @@ -3201,7 +3218,9 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) return id; } - file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); + file = anon_inode_getfile("bpf_link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); if (IS_ERR(file)) { bpf_link_free_id(id); put_unused_fd(fd); @@ -3229,7 +3248,9 @@ int bpf_link_settle(struct bpf_link_primer *primer) int bpf_link_new_fd(struct bpf_link *link) { - return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); + return anon_inode_getfd("bpf-link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); } struct bpf_link *bpf_link_get_from_fd(u32 ufd) @@ -3239,7 +3260,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) if (!f.file) return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_link_fops) { + if (f.file->f_op != &bpf_link_fops && f.file->f_op != &bpf_link_fops_poll) { fdput(f); return ERR_PTR(-EINVAL); } @@ -4971,7 +4992,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, uattr); else if (f.file->f_op == &btf_fops) err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); - else if (f.file->f_op == &bpf_link_fops) + else if (f.file->f_op == &bpf_link_fops || f.file->f_op == &bpf_link_fops_poll) err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else @@ -5106,7 +5127,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!file) return -EBADF; - if (file->f_op == &bpf_link_fops) { + if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { struct bpf_link *link = file->private_data; if (link->ops == &bpf_raw_tp_link_lops) { -- cgit v1.2.3 From 67c3e8353f45c27800eecc46e00e8272f063f7d1 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:42 -0700 Subject: bpf: export bpf_link_inc_not_zero. bpf_link_inc_not_zero() will be used by kernel modules. We will use it in bpf_testmod.c later. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-5-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 ++++++ kernel/bpf/syscall.c | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5eb61120e4f5..a834f4b761bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2334,6 +2334,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); @@ -2705,6 +2706,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 81efa1944942..5070fa20d05c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5437,10 +5437,11 @@ static int link_detach(union bpf_attr *attr) return ret; } -static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) { return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(bpf_link_inc_not_zero); struct bpf_link *bpf_link_by_id(u32 id) { -- cgit v1.2.3 From a79d8fe2ff8e78e549dc86cc853a61b029404871 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 May 2024 12:09:08 +0800 Subject: ipv6: sr: restruct ifdefines There are too many ifdef in IPv6 segment routing code that may cause logic problems. like commit 160e9d275218 ("ipv6: sr: fix invalid unregister error path"). To avoid this, the init functions are redefined for both cases. The code could be more clear after all fidefs are removed. Suggested-by: Simon Horman Suggested-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240529040908.3472952-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/seg6.h | 7 +++++++ include/net/seg6_hmac.h | 7 +++++++ net/ipv6/seg6.c | 33 +++++---------------------------- 3 files changed, 19 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index af668f17b398..82b3fbbcbb93 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern int seg6_local_init(void); extern void seg6_local_exit(void); +#else +static inline int seg6_iptunnel_init(void) { return 0; } +static inline void seg6_iptunnel_exit(void) {} +static inline int seg6_local_init(void) { return 0; } +static inline void seg6_local_exit(void) {} +#endif extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 2b5d2ee5613e..24f733b3e3fe 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key); extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +#ifdef CONFIG_IPV6_SEG6_HMAC extern int seg6_hmac_init(void); extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); +#else +static inline int seg6_hmac_init(void) { return 0; } +static inline void seg6_hmac_exit(void) {} +static inline int seg6_hmac_net_init(struct net *net) { return 0; } +static inline void seg6_hmac_net_exit(struct net *net) {} +#endif #endif diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index a31521e270f7..180da19c148c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -21,9 +21,7 @@ #include #include #include -#ifdef CONFIG_IPV6_SEG6_HMAC #include -#endif bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced) { @@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; -#ifdef CONFIG_IPV6_SEG6_HMAC if (seg6_hmac_net_init(net)) { kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); return -ENOMEM; } -#endif return 0; } @@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_net_exit(net); -#endif kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); @@ -520,41 +514,28 @@ int __init seg6_init(void) if (err) goto out_unregister_pernet; -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) goto out_unregister_genl; err = seg6_local_init(); - if (err) { - seg6_iptunnel_exit(); - goto out_unregister_genl; - } -#endif + if (err) + goto out_unregister_iptun; -#ifdef CONFIG_IPV6_SEG6_HMAC err = seg6_hmac_init(); if (err) - goto out_unregister_iptun; -#endif + goto out_unregister_seg6; pr_info("Segment Routing with IPv6\n"); out: return err; -#ifdef CONFIG_IPV6_SEG6_HMAC -out_unregister_iptun: -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL +out_unregister_seg6: seg6_local_exit(); +out_unregister_iptun: seg6_iptunnel_exit(); -#endif -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: -#endif -#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); -#endif out_unregister_pernet: unregister_pernet_subsys(&ip6_segments_ops); goto out; @@ -562,13 +543,9 @@ out_unregister_pernet: void seg6_exit(void) { -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_exit(); -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL seg6_local_exit(); seg6_iptunnel_exit(); -#endif genl_unregister_family(&seg6_genl_family); unregister_pernet_subsys(&ip6_segments_ops); } -- cgit v1.2.3 From 02d00dc73d8d0613f82063ed53d67912a422c21e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:29 +0100 Subject: net: phylink: rename ovr_an_inband to default_an_inband Since ovr_an_inband no longer overrides every MLO_AN_xxx mode, rename it to reflect what it now does - it changes the default mode from MLO_AN_PHY to MLO_AN_INBAND. Fix up the two users of this. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJMv-00Ecr1-Sk@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/phy/phylink.c | 2 +- include/linux/phylink.h | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 92b8f4ab26f1..9c44a3581950 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1232,7 +1232,7 @@ int memac_initialization(struct mac_device *mac_dev, !of_property_read_bool(mac_node, "managed") && mac_dev->phy_if != PHY_INTERFACE_MODE_MII && !phy_interface_mode_is_rgmii(mac_dev->phy_if)) - mac_dev->phylink_config.ovr_an_inband = true; + mac_dev->phylink_config.default_an_inband = true; of_node_put(fixed); err = memac_init(mac_dev->fman_mac); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ca19b232431a..488b2fd2349c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1221,7 +1221,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) - priv->phylink_config.ovr_an_inband = + priv->phylink_config.default_an_inband = mdio_bus_data->xpcs_an_inband; /* Set the platform/firmware specified interface mode. Note, phylink diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c81f1c1ee675..02427378acfd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -885,7 +885,7 @@ static int phylink_parse_mode(struct phylink *pl, const char *managed; unsigned long caps; - if (pl->config->ovr_an_inband) + if (pl->config->default_an_inband) pl->cfg_link_an_mode = MLO_AN_INBAND; dn = fwnode_get_named_child_node(fwnode, "fixed-link"); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5ea6b2ad2396..a30a692acc32 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -141,7 +141,8 @@ enum phylink_op_type { * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY. * The PHY driver should start the clock signal as soon as * possible and avoid stopping it during suspend events. - * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND + * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than + * MLO_AN_PHY. A fixed-link specification will override. * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx @@ -154,7 +155,7 @@ struct phylink_config { bool poll_fixed_state; bool mac_managed_pm; bool mac_requires_rxc; - bool ovr_an_inband; + bool default_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); -- cgit v1.2.3 From 83f55b01dd903040d6ab64f4f9d78a27391a5916 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:40 +0100 Subject: net: stmmac: rename xpcs_an_inband to default_an_inband Rename xpcs_an_inband to default_an_inband to reflect the change in phylink and its changed functionality. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJN6-00EcrD-43@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- include/linux/stmmac.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 60283543ffc8..5e96146b8bd9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -248,7 +248,7 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n"); priv->plat->max_speed = 2500; priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX; - priv->plat->mdio_bus_data->xpcs_an_inband = false; + priv->plat->mdio_bus_data->default_an_inband = false; } else { priv->plat->max_speed = 1000; } @@ -586,16 +586,16 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { plat->mdio_bus_data->has_xpcs = true; - plat->mdio_bus_data->xpcs_an_inband = true; + plat->mdio_bus_data->default_an_inband = true; } - /* For fixed-link setup, we clear xpcs_an_inband */ + /* For fixed-link setup, we clear default_an_inband */ if (fwnode) { struct fwnode_handle *fixed_node; fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link"); if (fixed_node) - plat->mdio_bus_data->xpcs_an_inband = false; + plat->mdio_bus_data->default_an_inband = false; fwnode_handle_put(fixed_node); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 488b2fd2349c..bbedf2a8c60f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1222,7 +1222,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) priv->phylink_config.default_an_inband = - mdio_bus_data->xpcs_an_inband; + mdio_bus_data->default_an_inband; /* Set the platform/firmware specified interface mode. Note, phylink * deals with the PHY interface mode, not the MAC interface mode. diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index f92c195c76ed..8f0f156d50d3 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -83,7 +83,7 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int has_xpcs; - unsigned int xpcs_an_inband; + unsigned int default_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From 69c8b998717c03adeada070882512ebeae11d71f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 May 2024 09:29:22 -0700 Subject: net: qstat: extend kdoc about get_base_stats mlx5 has a dedicated queue for PTP packets. Clarify that this sort of queues can also be accounted towards the base. Reviewed-by: Joe Damato Link: https://lore.kernel.org/r/20240529162922.3690698-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index a8a7e48dfa6c..5ca019d294ca 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -62,6 +62,8 @@ struct netdev_queue_stats_tx { * statistics will not generally add up to the total number of events for * the device. The @get_base_stats callback allows filling in the delta * between events for currently live queues and overall device history. + * @get_base_stats can also be used to report any miscellaneous packets + * transferred outside of the main set of queues used by the networking stack. * When the statistics for the entire device are queried, first @get_base_stats * is issued to collect the delta, and then a series of per-queue callbacks. * Only statistics which are set in @get_base_stats will be reported -- cgit v1.2.3 From 668b6a2ef832a878494cc1b12a881c8ec0494b25 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:34 +0200 Subject: flow_dissector: add support for tunnel control flags Dissect [no]csum, [no]dontfrag, [no]oam, [no]crit flags from skb metadata. This is a prerequisite for matching these control flags using TC flower. Suggested-by: Ilya Maximets Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/flow_dissector.h | 9 +++++++++ include/net/ip_tunnels.h | 12 ++++++++++++ net/core/flow_dissector.c | 16 +++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9ab376d1a677..99626475c3f4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -329,6 +329,14 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 +/** + * struct flow_dissector_key_enc_flags: tunnel metadata control flags + * @flags: tunnel control flags + */ +struct flow_dissector_key_enc_flags { + u32 flags; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -363,6 +371,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ + FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..5a530d4fb02c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,6 +247,18 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } +static inline void ip_tunnel_set_encflags_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_CSUM_BIT, present); + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); + __set_bit(IP_TUNNEL_OAM_BIT, present); + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f82e9a7d3b37..59fe46077b3c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -382,7 +382,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS)) + FLOW_DISSECTOR_KEY_ENC_OPTS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS)) return; info = skb_tunnel_info(skb); @@ -475,6 +477,18 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_FLAGS)) { + struct flow_dissector_key_enc_flags *enc_flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = {}; + + enc_flags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS, + target_container); + ip_tunnel_set_encflags_present(flags); + ip_tunnel_flags_and(flags, flags, info->key.tun_flags); + enc_flags->flags = bitmap_read(flags, IP_TUNNEL_CSUM_BIT, 32); + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); -- cgit v1.2.3 From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:35 +0200 Subject: net/sched: cls_flower: add support for matching tunnel control flags extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. Suggested-by: Ilya Maximets Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 3 +++ net/sched/cls_flower.c | 56 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fd9a6f20b60b..eef570c577ac 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -41,6 +41,12 @@ #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) +#define TUNNEL_FLAGS_PRESENT (\ + _BITUL(IP_TUNNEL_CSUM_BIT) | \ + _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ + _BITUL(IP_TUNNEL_OAM_BIT) | \ + _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -75,6 +81,7 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; + struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), }; static const struct nla_policy @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, + u32 *flags_mask, struct netlink_ext_ack *extack) +{ + /* mask is mandatory for flags */ + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { + NL_SET_ERR_MSG(extack, "missing enc_flags mask"); + return -EINVAL; + } + + *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); + *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (ret) return ret; - if (tb[TCA_FLOWER_KEY_FLAGS]) + if (tb[TCA_FLOWER_KEY_FLAGS]) { ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags, extack); + if (ret) + return ret; + } + + if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) + ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, + &mask->enc_flags.flags, extack); return ret; } @@ -2175,6 +2208,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3291,6 +3326,22 @@ err_cfm_opts: return err; } +static int fl_dump_key_enc_flags(struct sk_buff *skb, + struct flow_dissector_key_enc_flags *key, + struct flow_dissector_key_enc_flags *mask) +{ + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) + return -EMSGSIZE; + + return 0; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3592,6 +3643,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; + if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + goto nla_put_failure; + return 0; nla_put_failure: -- cgit v1.2.3 From 071115301838c6c265065dd5d6bf43a9a987a550 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:14 -0700 Subject: tcp: wrap mptcp and decrypted checks into tcp_skb_can_collapse_rx() tcp_skb_can_collapse() checks for conditions which don't make sense on input. Because of this we ended up sprinkling a few pairs of mptcp_skb_can_collapse() and skb_cmp_decrypted() calls on the input path. Group them in a new helper. This should make it less likely that someone will check mptcp and not decrypted or vice versa when adding new code. This implicitly adds a decrypted check early in tcp_collapse(). AFAIU this will very slightly increase our ability to collapse packets under memory pressure, not a real bug. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_input.c | 11 +++-------- net/ipv4/tcp_ipv4.c | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32815a40dea1..32741856da01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1071,6 +1071,13 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, skb_pure_zcopy_same(to, from)); } +static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(mptcp_skb_can_collapse(to, from) && + !skb_cmp_decrypted(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5aadf64e554d..212b6fd0caf7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4813,10 +4813,7 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; - if (!mptcp_skb_can_collapse(to, from)) - return false; - - if (skb_cmp_decrypted(from, to)) + if (!tcp_skb_can_collapse_rx(to, from)) return false; if (!skb_try_coalesce(to, from, fragstolen, &delta)) @@ -5372,7 +5369,7 @@ restart: break; } - if (n && n != tail && mptcp_skb_can_collapse(skb, n) && + if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) && TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { end_of_skbs = false; break; @@ -5423,11 +5420,9 @@ restart: skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || - !mptcp_skb_can_collapse(nskb, skb) || + !tcp_skb_can_collapse_rx(nskb, skb) || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) goto end; - if (skb_cmp_decrypted(skb, nskb)) - goto end; } } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59d5b064f233..04044605cadf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2044,8 +2044,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || - !mptcp_skb_can_collapse(tail, skb) || - skb_cmp_decrypted(tail, skb) || + !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) goto no_coalesce; -- cgit v1.2.3 From 1be68a87ab333af37b02ad928a724a722a5a8203 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:15 -0700 Subject: tcp: add a helper for setting EOR on tail skb TLS (and hopefully soon PSP will) use EOR to prevent skbs with different decrypted state from getting merged, without adding new tests to the skb handling. In both cases once the connection switches to an "encrypted" state, all subsequent skbs will be encrypted, so a single "EOR fence" is sufficient to prevent mixing. Add a helper for setting the EOR bit, to make this arrangement more explicit. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 9 +++++++++ net/tls/tls_device.c | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32741856da01..08c3b99501cf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1066,6 +1066,7 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { + /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && skb_pure_zcopy_same(to, from)); @@ -2102,6 +2103,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc tcp_wmem_free_skb(sk, skb); } +static inline void tcp_write_collapse_fence(struct sock *sk) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb) + TCP_SKB_CB(skb)->eor = 1; +} + static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ab6e694f7bc2..dc063c2c7950 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -231,14 +231,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; - struct sk_buff *skb; int err = 0; u8 *rcd_sn; - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; - + tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); @@ -1067,7 +1063,6 @@ int tls_set_device_offload(struct sock *sk) struct tls_prot_info *prot; struct net_device *netdev; struct tls_context *ctx; - struct sk_buff *skb; char *iv, *rec_seq; int rc; @@ -1138,9 +1133,7 @@ int tls_set_device_offload(struct sock *sk) * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; + tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after -- cgit v1.2.3 From 61e2bbafb00e4b9a5de45e6448a7b6b818658576 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 31 May 2024 23:46:34 +0800 Subject: net: remove NULL-pointer net parameter in ip_metrics_convert When I was doing some experiments, I found that when using the first parameter, namely, struct net, in ip_metrics_convert() always triggers NULL pointer crash. Then I digged into this part, realizing that we can remove this one due to its uselessness. Signed-off-by: Jason Xing Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- include/net/tcp.h | 2 +- net/ipv4/fib_semantics.c | 5 ++--- net/ipv4/metrics.c | 8 ++++---- net/ipv4/tcp_cong.c | 11 +++++------ net/ipv6/route.c | 2 +- 6 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 6d735e00d3f3..c5606cadb1a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -506,8 +506,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, - int fc_mx_len, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 08c3b99501cf..a70fc39090fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1224,7 +1224,7 @@ extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f669da98d11d..7b6b042208bd 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1030,7 +1030,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) bool ecn_ca = false; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) return false; @@ -1459,8 +1459,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) goto failure; - fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, - cfg->fc_mx_len, extack); + fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); kfree(fi); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 0e3ee1532848..8ddac1f595ed 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -7,7 +7,7 @@ #include #include -static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, +static int ip_metrics_convert(struct nlattr *fc_mx, int fc_mx_len, u32 *metrics, struct netlink_ext_ack *extack) { @@ -31,7 +31,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, char tmp[TCP_CA_NAME_MAX]; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) { NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm"); return -EINVAL; @@ -63,7 +63,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, return 0; } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack) { @@ -77,7 +77,7 @@ struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, if (unlikely(!fib_metrics)) return ERR_PTR(-ENOMEM); - err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics, + err = ip_metrics_convert(fc_mx, fc_mx_len, fib_metrics->metrics, extack); if (!err) { refcount_set(&fib_metrics->refcnt, 1); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 28ffcfbeef14..48617d99abb0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -46,8 +46,7 @@ void tcp_set_ca_state(struct sock *sk, const u8 ca_state) } /* Must be called with rcu lock held */ -static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, - const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name) { struct tcp_congestion_ops *ca = tcp_ca_find(name); @@ -178,7 +177,7 @@ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_cong return ret; } -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -186,7 +185,7 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -283,7 +282,7 @@ int tcp_set_default_congestion_control(struct net *net, const char *name) int ret; rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (!ca) { ret = -ENOENT; } else if (!bpf_try_module_get(ca, ca->owner)) { @@ -421,7 +420,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, if (!load) ca = tcp_ca_find(name); else - ca = tcp_ca_find_autoload(sock_net(sk), name); + ca = tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a504b88ec06b..12f647d0fec0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3761,7 +3761,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; - rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len, + rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(rt->fib6_metrics)) { err = PTR_ERR(rt->fib6_metrics); -- cgit v1.2.3 From 6f49c3fb563c0a95a838216eaf7d9b02ece44bf5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:29:17 +0100 Subject: net: caif: remove unused structs 'cfpktq' has been unused since commit 73d6ac633c6c ("caif: code cleanup"). 'caif_packet_funcs' is declared but never defined. Remove both of them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/caif/caif_layer.h | 2 -- net/caif/cfpkt_skbuff.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 51f7bb42a936..0f45d875905f 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -11,9 +11,7 @@ struct cflayer; struct cfpkt; -struct cfpktq; struct caif_payload_info; -struct caif_packet_funcs; #define CAIF_LAYER_NAME_SZ 16 diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 7796414d47e5..2ae8cfa3df88 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -21,13 +21,6 @@ do { \ pr_warn(errmsg); \ } while (0) -struct cfpktq { - struct sk_buff_head head; - atomic_t count; - /* Lock protects count updates */ - spinlock_t lock; -}; - /* * net/caif/ is generic and does not * understand SKB, so we do this typecast -- cgit v1.2.3 From 82dc29b9737edf2d13561ebcf6212c0b88c41129 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Jun 2024 16:18:52 +0200 Subject: devlink: Constify the 'table_ops' parameter of devl_dpipe_table_register() "struct devlink_dpipe_table_ops" only contains some function pointers. Update "struct devlink_dpipe_table" and the 'table_ops' parameter of devl_dpipe_table_register() so that structures in drivers can be constified. Constifying these structures will move some data to a read-only section, so increase overall security. Signed-off-by: Christophe JAILLET Reviewed-by: Wojciech Drewek Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- net/devlink/dpipe.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 35eb0f884386..db5eff6cb60f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -352,7 +352,7 @@ struct devlink_dpipe_table { bool resource_valid; u64 resource_id; u64 resource_units; - struct devlink_dpipe_table_ops *table_ops; + const struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -1751,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); void devl_dpipe_table_unregister(struct devlink *devlink, const char *table_name); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index a72a9292efc5..55009b377447 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -839,7 +839,7 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); */ int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; -- cgit v1.2.3 From f086edef71be7174a16c1ed67ac65a085cda28b1 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Mon, 3 Jun 2024 21:30:54 +0000 Subject: tcp: add sysctl_tcp_rto_min_us Adding a sysctl knob to allow user to specify a default rto_min at socket init time, other than using the hard coded 200ms default rto_min. Note that the rto_min route option has the highest precedence for configuring this setting, followed by the TCP_BPF_RTO_MIN socket option, followed by the tcp_rto_min_us sysctl. Signed-off-by: Kevin Yang Reviewed-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Tony Lu Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 13 +++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bd50df6a5a42..6e99eccdb837 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1196,6 +1196,19 @@ tcp_pingpong_thresh - INTEGER Default: 1 +tcp_rto_min_us - INTEGER + Minimal TCP retransmission timeout (in microseconds). Note that the + rto_min route option has the highest precedence for configuring this + setting, followed by the TCP_BPF_RTO_MIN socket option, followed by + this tcp_rto_min_us sysctl. + + The recommended practice is to use a value less or equal to 200000 + microseconds. + + Possible Values: 1 - INT_MAX + + Default: 200000 + UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c356c458b340..a91bb971f901 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -170,6 +170,7 @@ struct netns_ipv4 { u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; + int sysctl_tcp_rto_min_us; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d7892f34a15b..bb64c0ef092d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1503,6 +1503,14 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_min_us", + .data = &init_net.ipv4.sysctl_tcp_rto_min_us, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5fa68e7f6ddb..fa43aaacd92b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -420,6 +420,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + int rto_min_us; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -428,7 +429,8 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; - icsk->icsk_rto_min = TCP_RTO_MIN; + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); + icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3ef4b274c24b..3613e08ca794 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3502,6 +3502,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_shrink_window = 0; net->ipv4.sysctl_tcp_pingpong_thresh = 1; + net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); return 0; } -- cgit v1.2.3 From 758191c9ea7bcc45dd99398a538ae4ab27c4029e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:17 +0300 Subject: net/mlx5e: SHAMPO, Use KSMs instead of KLMs KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact, it is a faster mechanism than KLM. SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer. As it used KLMs with the same buffer size for each entry, we can use KSMs instead. This commit changes the Mkeys that map the SHAMPO headers buffer from KLMs to KSMs. Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 +------ .../net/ethernet/mellanox/mlx5/core/en/params.c | 12 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 19 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++++--- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 65 +++++++++++----------- include/linux/mlx5/device.h | 1 + 6 files changed, 71 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ff326601d4a4..bec784d25d7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -80,6 +80,7 @@ struct page_pool; SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8) #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) #define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) @@ -146,25 +147,6 @@ struct page_pool; #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ -#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ - (sizeof(struct mlx5e_umr_wqe) +\ - (sizeof(struct mlx5_klm) * (sgl_len))) - -#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \ - (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB)) - -#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\ - (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS)) - -#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\ - (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) - -#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ - ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT) - -#define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) - #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index ec819dfc98be..6c9ccccca81e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1071,18 +1071,18 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { - int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest; + int max_num_of_umr_per_wqe, max_hd_per_wqe, max_ksm_per_umr, rest; void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_ksm_per_umr = MLX5E_MAX_KSM_PER_WQE(mdev); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); - max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; - rest = max_hd_per_wqe % max_klm_per_umr; - wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe; + max_num_of_umr_per_wqe = max_hd_per_wqe / max_ksm_per_umr; + rest = max_hd_per_wqe % max_ksm_per_umr; + wqebbs = MLX5E_KSM_UMR_WQEBBS(max_ksm_per_umr) * max_num_of_umr_per_wqe; if (rest) - wqebbs += MLX5E_KLM_UMR_WQEBBS(rest); + wqebbs += MLX5E_KSM_UMR_WQEBBS(rest); wqebbs *= wq_size; return wqebbs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 879d698b6119..d1f0f868d494 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -34,6 +34,25 @@ #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) +#define MLX5E_KSM_UMR_WQE_SZ(sgl_len)\ + (sizeof(struct mlx5e_umr_wqe) +\ + (sizeof(struct mlx5_ksm) * (sgl_len))) + +#define MLX5E_KSM_UMR_WQEBBS(ksm_entries) \ + (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_BB)) + +#define MLX5E_KSM_UMR_DS_CNT(ksm_entries)\ + (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_DS)) + +#define MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size)\ + (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_ksm)) + +#define MLX5E_KSM_ENTRIES_PER_WQE(wqe_size)\ + ALIGN_DOWN(MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT) + +#define MLX5E_MAX_KSM_PER_WQE(mdev) \ + MLX5E_KSM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) + static inline ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d21a87ddc934..2a3e0de51f0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -504,8 +504,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, return err; } -static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, - u64 nentries, +static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev, + u64 nentries, u8 log_entry_size, u32 *umr_mkey) { int inlen; @@ -525,12 +525,13 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET(mkc, mkc, translations_octword_size, nentries); - MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, log_page_size, log_entry_size); + MLX5_SET64(mkc, mkc, len, nentries << log_entry_size); err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); kvfree(in); @@ -565,14 +566,16 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); - if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) { - mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", - max_klm_size, rq->mpwqe.shampo->hd_per_wq); + if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { + mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", + max_ksm_size, rq->mpwqe.shampo->hd_per_wq); return -EINVAL; } - return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + + return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, &rq->mpwqe.shampo->mkey); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3af4f70de334..f1fbf60d0356 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -619,25 +619,25 @@ static int bitmap_find_window(unsigned long *bitmap, int len, return min(len, count); } -static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, - __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) +static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, + __be32 key, u16 offset, u16 ksm_len) { - memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); + memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms)); umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); umr_wqe->ctrl.umr_mkey = key; umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) - | MLX5E_KLM_UMR_DS_CNT(klm_len)); + | MLX5E_KSM_UMR_DS_CNT(ksm_len)); umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); - umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); + umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len); umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, - u16 klm_entries, u16 index) + u16 ksm_entries, u16 index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 entries, pi, header_offset, err, wqe_bbs, new_entries; @@ -650,20 +650,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, int headroom, i; headroom = rq->buff.headroom; - new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); - entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); - wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); + new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1)); + entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT); + wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries); pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); - build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + build_ksm_umr(sq, umr_wqe, shampo->key, index, entries); frag_page = &shampo->pages[page_index]; for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; - if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < - MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)) - goto update_klm; + if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index < + MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT)) + goto update_ksm; header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { @@ -683,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, dma_info->frag_page = frag_page; } -update_klm: - umr_wqe->inline_klms[i].bcount = - cpu_to_be32(MLX5E_RX_MAX_HEAD); - umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); - umr_wqe->inline_klms[i].va = - cpu_to_be64(dma_info->addr + headroom); +update_ksm: + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(dma_info->addr + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -720,37 +719,37 @@ err_unmap: static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - u16 klm_entries, num_wqe, index, entries_before; + u16 ksm_entries, num_wqe, index, entries_before; struct mlx5e_icosq *sq = rq->icosq; - int i, err, max_klm_entries, len; + int i, err, max_ksm_entries, len; - max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); - klm_entries = bitmap_find_window(shampo->bitmap, + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); + ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - if (!klm_entries) + if (!ksm_entries) return 0; - klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); - index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); + ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT); entries_before = shampo->hd_per_wq - index; - if (unlikely(entries_before < klm_entries)) - num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + - DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); + if (unlikely(entries_before < ksm_entries)) + num_wqe = DIV_ROUND_UP(entries_before, max_ksm_entries) + + DIV_ROUND_UP(ksm_entries - entries_before, max_ksm_entries); else - num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); + num_wqe = DIV_ROUND_UP(ksm_entries, max_ksm_entries); for (i = 0; i < num_wqe; i++) { - len = (klm_entries > max_klm_entries) ? max_klm_entries : - klm_entries; + len = (ksm_entries > max_ksm_entries) ? max_ksm_entries : + ksm_entries; if (unlikely(index + len > shampo->hd_per_wq)) len = shampo->hd_per_wq - index; err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); if (unlikely(err)) return err; index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); - klm_entries -= len; + ksm_entries -= len; } return 0; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d7bb31d9a446..da09bfaa7b81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -294,6 +294,7 @@ enum { #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) +#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) -- cgit v1.2.3 From 99be56171fa9ffea494dfe3d4a7f6e7e51630c2e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:18 +0300 Subject: net/mlx5e: SHAMPO, Re-enable HW-GRO Add back HW-GRO to the reported features. As the current implementation of HW-GRO uses KSMs with a specific fixed buffer size (256B) to map its headers buffer, we reported the feature only if the NIC is supporting KSM and the minimum value for buffer size is below the requested one. iperf3 bandwidth comparison: +---------+--------+--------+-----------+ | streams | SW GRO | HW GRO | Unit | |---------+--------+--------+-----------| | 1 | 36 | 42 | Gbits/sec | | 4 | 34 | 39 | Gbits/sec | | 8 | 31 | 35 | Gbits/sec | +---------+--------+--------+-----------+ A downstream patch will add skb fragment coalescing which will improve performance considerably. Benchmark details: VM based setup CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores NIC: ConnectX-7 100GbE iperf3 and irq running on same CPU over a single receive queue Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-14-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 26 +++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 16 +++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2a3e0de51f0e..44a64d062e42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -74,6 +74,27 @@ #include "lib/devcom.h" #include "lib/sd.h" +static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, shampo)) + return false; + + /* Our HW-GRO implementation relies on "KSM Mkey" for + * SHAMPO headers buffer mapping + */ + if (!MLX5_CAP_GEN(mdev, fixed_buffer_size)) + return false; + + if (!MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer_valid)) + return false; + + if (MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer) > + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) + return false; + + return true; +} + bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { @@ -5331,6 +5352,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + if (mlx5e_hw_gro_supported(mdev) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->hw_features |= NETIF_F_GRO_HW; + if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..17acd0f3ca8e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0x6]; u8 go_back_n[0x1]; - u8 shampo[0x1]; - u8 reserved_at_bb[0x5]; + u8 reserved_at_ba[0x6]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x10]; + u8 reserved_at_2a0[0xb]; + u8 shampo[0x1]; + u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; u8 max_flow_counter_31_16[0x10]; @@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_250[0x10]; u8 reserved_at_260[0x120]; - u8 reserved_at_380[0x10]; + u8 reserved_at_380[0xb]; + u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; u8 reserved_at_3a0[0x10]; @@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x1]; + u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; + u8 reserved_at_402[0x1e]; + + u8 reserved_at_420[0x3e0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From b4cb4a1391dcdc640c4ade003aaf0ee19cc8d509 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2024 11:16:03 +0000 Subject: net: use unrcu_pointer() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toke mentioned unrcu_pointer() existence, allowing to remove some of the ugly casts we have when using xchg() for rcu protected pointers. Also make inet_rcv_compat const. Signed-off-by: Eric Dumazet Cc: Toke Høiland-Jørgensen Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240604111603.45871-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- net/core/gen_estimator.c | 2 +- net/core/sock_diag.c | 8 +++----- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 7 ++++--- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ipv6_sockglue.c | 3 +-- net/ipv6/route.c | 6 +++--- net/sched/act_api.c | 2 +- 12 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 953c8dc4e259..b30ea0c342a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2095,7 +2095,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) sk_tx_queue_clear(sk); WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); + old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst))); dst_release(old_dst); } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index fae9c4694186..412816076b8b 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -206,7 +206,7 @@ void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est) { struct net_rate_estimator *est; - est = xchg((__force struct net_rate_estimator **)rate_est, NULL); + est = unrcu_pointer(xchg(rate_est, NULL)); if (est) { timer_shutdown_sync(&est->timer); kfree_rcu(est, rcu); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 654122838025..a08eed9b9142 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -18,7 +18,7 @@ static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX]; -static struct sock_diag_inet_compat __rcu *inet_rcv_compat; +static const struct sock_diag_inet_compat __rcu *inet_rcv_compat; static struct workqueue_struct *broadcast_wq; @@ -187,8 +187,7 @@ void sock_diag_broadcast_destroy(struct sock *sk) void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr) { - xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - ptr); + xchg(&inet_rcv_compat, RCU_INITIALIZER(ptr)); } EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); @@ -196,8 +195,7 @@ void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr) { const struct sock_diag_inet_compat *old; - old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - NULL); + old = unrcu_pointer(xchg(&inet_rcv_compat, NULL)); WARN_ON_ONCE(old != ptr); } EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..3a95c0f13ce3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1953,7 +1953,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + opt = unrcu_pointer(xchg(&req_inet->ireq_opt, RCU_INITIALIZER(opt))); if (opt) kfree_rcu(opt, rcu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fa43aaacd92b..f727bc8d82a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3081,7 +3081,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); + dst_release(unrcu_pointer(xchg(&sk->sk_rx_dst, NULL))); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ed54e7334a9..0f523cbfe329 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -49,7 +49,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); + ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL)); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -80,9 +80,10 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); + octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx))); } else { - octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); + octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, + RCU_INITIALIZER(ctx))); } if (octx) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 189c9113fe9a..c9ca6d285347 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2230,7 +2230,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); + old = unrcu_pointer(xchg(&sk->sk_rx_dst, RCU_INITIALIZER(dst))); dst_release(old); return old != dst; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8041dc181bd4..e03fb9a1dbeb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -509,7 +509,7 @@ void inet6_cleanup_sock(struct sock *sk) /* Free tx options */ - opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 31d77885bcae..15f9abe50656 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -984,7 +984,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d4c28ec1bc51..cd342d5015c6 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, - opt); + opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt))); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12f647d0fec0..ae6fbdf8995d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -368,7 +368,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - from = xchg((__force struct fib6_info **)&rt->from, NULL); + from = unrcu_pointer(xchg(&rt->from, NULL)); fib6_info_release(from); } @@ -1437,7 +1437,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, if (res->f6i->fib6_destroying) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } @@ -1466,7 +1466,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); + from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL)); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb1160..7458b3154426 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -62,7 +62,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, { struct tc_cookie *old; - old = xchg((__force struct tc_cookie **)old_cookie, new_cookie); + old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } -- cgit v1.2.3 From c34506406dd5cfb352f8c53bb6a1b9535c0905dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:51 +0000 Subject: tcp: small changes in reqsk_put() and reqsk_free() In reqsk_free(), use DEBUG_NET_WARN_ON_ONCE() instead of WARN_ON_ONCE() for a condition which never fired. In reqsk_put() directly call __reqsk_free(), there is no point checking rsk_refcnt again right after a transition to zero. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index ebcb8896bffc..a8f82216c628 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -172,14 +172,14 @@ static inline void __reqsk_free(struct request_sock *req) static inline void reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); __reqsk_free(req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) - reqsk_free(req); + __reqsk_free(req); } /* -- cgit v1.2.3 From 6971d21672827a701c5ea180891b7ea6cf06f6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:53 +0000 Subject: tcp: move reqsk_alloc() to inet_connection_sock.c reqsk_alloc() has a single caller, no need to expose it in include/net/request_sock.h. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 33 --------------------------------- net/ipv4/inet_connection_sock.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a8f82216c628..b07b1cd14e9f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -128,39 +128,6 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline struct request_sock * -reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, - bool attach_listener) -{ - struct request_sock *req; - - req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - if (!req) - return NULL; - req->rsk_listener = NULL; - if (attach_listener) { - if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { - kmem_cache_free(ops->slab, req); - return NULL; - } - req->rsk_listener = sk_listener; - } - req->rsk_ops = ops; - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - req->syncookie = 0; - req->timeout = 0; - req->num_timeout = 0; - req->num_retrans = 0; - req->sk = NULL; - refcount_set(&req->rsk_refcnt, 0); - - return req; -} -#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) - static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a9d2e6308910..7ced569778ab 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -911,6 +911,39 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +static struct request_sock * +reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, + bool attach_listener) +{ + struct request_sock *req; + + req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; + } + req->rsk_listener = sk_listener; + } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + req->syncookie = 0; + req->timeout = 0; + req->num_timeout = 0; + req->num_retrans = 0; + req->sk = NULL; + refcount_set(&req->rsk_refcnt, 0); + + return req; +} +#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) + struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) -- cgit v1.2.3 From 97d833ceb27dc19f8777d63f90be4a27b5daeedf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Jun 2024 16:49:42 +0200 Subject: mlxsw: spectrum_acl_erp: Fix object nesting warning ACLs in Spectrum-2 and newer ASICs can reside in the algorithmic TCAM (A-TCAM) or in the ordinary circuit TCAM (C-TCAM). The former can contain more ACLs (i.e., tc filters), but the number of masks in each region (i.e., tc chain) is limited. In order to mitigate the effects of the above limitation, the device allows filters to share a single mask if their masks only differ in up to 8 consecutive bits. For example, dst_ip/25 can be represented using dst_ip/24 with a delta of 1 bit. The C-TCAM does not have a limit on the number of masks being used (and therefore does not support mask aggregation), but can contain a limited number of filters. The driver uses the "objagg" library to perform the mask aggregation by passing it objects that consist of the filter's mask and whether the filter is to be inserted into the A-TCAM or the C-TCAM since filters in different TCAMs cannot share a mask. The set of created objects is dependent on the insertion order of the filters and is not necessarily optimal. Therefore, the driver will periodically ask the library to compute a more optimal set ("hints") by looking at all the existing objects. When the library asks the driver whether two objects can be aggregated the driver only compares the provided masks and ignores the A-TCAM / C-TCAM indication. This is the right thing to do since the goal is to move as many filters as possible to the A-TCAM. The driver also forbids two identical masks from being aggregated since this can only happen if one was intentionally put in the C-TCAM to avoid a conflict in the A-TCAM. The above can result in the following set of hints: H1: {mask X, A-TCAM} -> H2: {mask Y, A-TCAM} // X is Y + delta H3: {mask Y, C-TCAM} -> H4: {mask Z, A-TCAM} // Y is Z + delta After getting the hints from the library the driver will start migrating filters from one region to another while consulting the computed hints and instructing the device to perform a lookup in both regions during the transition. Assuming a filter with mask X is being migrated into the A-TCAM in the new region, the hints lookup will return H1. Since H2 is the parent of H1, the library will try to find the object associated with it and create it if necessary in which case another hints lookup (recursive) will be performed. This hints lookup for {mask Y, A-TCAM} will either return H2 or H3 since the driver passes the library an object comparison function that ignores the A-TCAM / C-TCAM indication. This can eventually lead to nested objects which are not supported by the library [1]. Fix by removing the object comparison function from both the driver and the library as the driver was the only user. That way the lookup will only return exact matches. I do not have a reliable reproducer that can reproduce the issue in a timely manner, but before the fix the issue would reproduce in several minutes and with the fix it does not reproduce in over an hour. Note that the current usefulness of the hints is limited because they include the C-TCAM indication and represent aggregation that cannot actually happen. This will be addressed in net-next. [1] WARNING: CPU: 0 PID: 153 at lib/objagg.c:170 objagg_obj_parent_assign+0xb5/0xd0 Modules linked in: CPU: 0 PID: 153 Comm: kworker/0:18 Not tainted 6.9.0-rc6-custom-g70fbc2c1c38b #42 Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:objagg_obj_parent_assign+0xb5/0xd0 [...] Call Trace: __objagg_obj_get+0x2bb/0x580 objagg_obj_get+0xe/0x80 mlxsw_sp_acl_erp_mask_get+0xb5/0xf0 mlxsw_sp_acl_atcam_entry_add+0xe8/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_one+0x16b/0x270 mlxsw_sp_acl_tcam_vregion_rehash_work+0xbe/0x510 process_one_work+0x151/0x370 Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c | 13 ------------- include/linux/objagg.h | 1 - lib/objagg.c | 15 --------------- 3 files changed, 29 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index d231f4d2888b..9eee229303cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1217,18 +1217,6 @@ static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, return err ? false : true; } -static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) -{ - const struct mlxsw_sp_acl_erp_key *key1 = obj1; - const struct mlxsw_sp_acl_erp_key *key2 = obj2; - - /* For hints purposes, two objects are considered equal - * in case the masks are the same. Does not matter what - * the "ctcam" value is. - */ - return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); -} - static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, void *obj) { @@ -1308,7 +1296,6 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), .delta_check = mlxsw_sp_acl_erp_delta_check, - .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, .delta_create = mlxsw_sp_acl_erp_delta_create, .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, .root_create = mlxsw_sp_acl_erp_root_create, diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 78021777df46..6df5b887dc54 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -8,7 +8,6 @@ struct objagg_ops { size_t obj_size; bool (*delta_check)(void *priv, const void *parent_obj, const void *obj); - int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); void * (*root_create)(void *priv, void *obj, unsigned int root_id); diff --git a/lib/objagg.c b/lib/objagg.c index 0f99ea5f5371..363e43e849ac 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -906,20 +906,6 @@ static const struct objagg_opt_algo *objagg_opt_algos[] = { [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, }; -static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, - const void *obj) -{ - struct rhashtable *ht = arg->ht; - struct objagg_hints *objagg_hints = - container_of(ht, struct objagg_hints, node_ht); - const struct objagg_ops *ops = objagg_hints->ops; - const char *ptr = obj; - - ptr += ht->p.key_offset; - return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : - memcmp(ptr, arg->key, ht->p.key_len); -} - /** * objagg_hints_get - obtains hints instance * @objagg: objagg instance @@ -958,7 +944,6 @@ struct objagg_hints *objagg_hints_get(struct objagg *objagg, offsetof(struct objagg_hints_node, obj); objagg_hints->ht_params.head_offset = offsetof(struct objagg_hints_node, ht_node); - objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); if (err) -- cgit v1.2.3 From b334b924c9b709bc969644fb5c406f5c9d01dceb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 6 Jun 2024 17:11:37 +0200 Subject: net: tcp/dccp: prepare for tw_timer un-pinning The TCP timewait timer is proving to be problematic for setups where scheduler CPU isolation is achieved at runtime via cpusets (as opposed to statically via isolcpus=domains). What happens there is a CPU goes through tcp_time_wait(), arming the time_wait timer, then gets isolated. TCP_TIMEWAIT_LEN later, the timer fires, causing interference for the now-isolated CPU. This is conceptually similar to the issue described in commit e02b93124855 ("workqueue: Unbind kworkers before sending them to exit()") Move inet_twsk_schedule() to within inet_twsk_hashdance(), with the ehash lock held. Expand the lock's critical section from inet_twsk_kill() to inet_twsk_deschedule_put(), serializing the scheduling vs descheduling of the timer. IOW, this prevents the following race: tcp_time_wait() inet_twsk_hashdance() inet_twsk_deschedule_put() del_timer_sync() inet_twsk_schedule() Thanks to Paolo Abeni for suggesting to leverage the ehash lock. This also restores a comment from commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") as inet_twsk_hashdance() had a "Step 1" and "Step 3" comment, but the "Step 2" had gone missing. inet_twsk_deschedule_put() now acquires the ehash spinlock to synchronize with inet_twsk_hashdance_schedule(). To ease possible regression search, actual un-pin is done in next patch. Link: https://lore.kernel.org/all/ZPhpfMjSiHVjQkTk@localhost.localdomain/ Reviewed-by: Eric Dumazet Signed-off-by: Valentin Schneider Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 +++-- net/dccp/minisocks.c | 3 +-- net/ipv4/inet_timewait_sock.c | 52 ++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 3 +-- 5 files changed, 52 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 2a536eea9424..5b43d220243d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,10 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 251a57cf5822..deb52d7d31b4 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -59,11 +59,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index e28075f0006e..628d33a41ce5 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -96,9 +96,13 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. + * + * The caller must not access @tw anymore after this function returns. */ -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo) +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -129,26 +133,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); + /* Step 2: Hash TW into tcp ehash chain */ inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock(lock); + /* Ensure above writes are committed into memory before updating the + * refcount. + * Provides ordering vs later refcount_inc(). + */ + smp_wmb(); /* tw_refcnt is set to 3 because we have : * - one reference for bhash chain. * - one reference for ehash chain. * - one reference for timer. - * We can use atomic_set() because prior spin_lock()/spin_unlock() - * committed into memory all tw fields. * Also note that after this point, we lost our implicit reference * so we are not allowed to use tw anymore. */ refcount_set(&tw->tw_refcnt, 3); + + inet_twsk_schedule(tw, timeo); + + spin_unlock(lock); } -EXPORT_SYMBOL_GPL(inet_twsk_hashdance); +EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); static void tw_timer_handler(struct timer_list *t) { @@ -217,7 +228,34 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc); */ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { - if (del_timer_sync(&tw->tw_timer)) + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + + /* inet_twsk_purge() walks over all sockets, including tw ones, + * and removes them via inet_twsk_deschedule_put() after a + * refcount_inc_not_zero(). + * + * inet_twsk_hashdance_schedule() must (re)init the refcount before + * arming the timer, i.e. inet_twsk_purge can obtain a reference to + * a twsk that did not yet schedule the timer. + * + * The ehash lock synchronizes these two: + * After acquiring the lock, the timer is always scheduled (else + * timer_shutdown returns false), because hashdance_schedule releases + * the ehash lock only after completing the timer initialization. + * + * Without grabbing the ehash lock, we get: + * 1) cpu x sets twsk refcount to 3 + * 2) cpu y bumps refcount to 4 + * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down + * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown + * -> timer refcount is never decremented. + */ + spin_lock(lock); + /* Makes sure hashdance_schedule() has completed */ + spin_unlock(lock); + + if (timer_shutdown_sync(&tw->tw_timer)) inet_twsk_kill(tw); inet_twsk_put(tw); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3613e08ca794..e61c7c974745 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), ts_recent_stamp)))) { - /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c894e540730..fc9a850ed9bd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -350,11 +350,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo); + inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this -- cgit v1.2.3 From f81d0dd2fde35fd1acc30b3f4de6aaf57d514551 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 17:11:39 +0200 Subject: tcp: move inet_twsk_schedule helper out of header Its no longer used outside inet_timewait_sock.c, so move it there. Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 5 ----- net/ipv4/inet_timewait_sock.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b43d220243d..f88b68269012 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -101,11 +101,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) -{ - __inet_twsk_schedule(tw, timeo, false); -} - static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index b2d97c816c99..337390ba85b4 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -92,6 +92,11 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, hlist_nulls_add_head_rcu(&tw->tw_node, list); } +static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + /* * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it -- cgit v1.2.3 From 5fbf57a937f418fe204f9dbb7735e91984f4ee6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 6 Jun 2024 12:29:06 -0700 Subject: net: netlink: remove the cb_mutex "injection" from netlink core Back in 2007, in commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it") netlink core was extended to allow subsystems to replace the dump mutex lock with its own lock. The mechanism was used by rtnetlink to take rtnl_lock but it isn't sufficiently flexible for other users. Over the 17 years since it was added no other user appeared. Since rtnetlink needs conditional locking now, and doesn't use it either, axe this feature complete. Signed-off-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - net/netlink/af_netlink.c | 18 +++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5df7340d4dab..b332c2048c75 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -47,7 +47,6 @@ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); - struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8bbbe75e75db..0b7a89db3ab7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -636,8 +636,7 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *dump_cb_mutex, int protocol, - int kern) + int protocol, int kern) { struct sock *sk; struct netlink_sock *nlk; @@ -655,7 +654,6 @@ static int __netlink_create(struct net *net, struct socket *sock, lockdep_set_class_and_name(&nlk->nl_cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); - nlk->dump_cb_mutex = dump_cb_mutex; init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -667,7 +665,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, int kern) { struct module *module = NULL; - struct mutex *cb_mutex; struct netlink_sock *nlk; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); @@ -696,7 +693,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, module = nl_table[protocol].module; else err = -EPROTONOSUPPORT; - cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; unbind = nl_table[protocol].unbind; release = nl_table[protocol].release; @@ -705,7 +701,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol, kern); + err = __netlink_create(net, sock, protocol, kern); if (err < 0) goto out_module; @@ -2016,7 +2012,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, struct sock *sk; struct netlink_sock *nlk; struct listeners *listeners = NULL; - struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; unsigned int groups; BUG_ON(!nl_table); @@ -2027,7 +2022,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) + if (__netlink_create(net, sock, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; @@ -2055,7 +2050,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (!nl_table[unit].registered) { nl_table[unit].groups = groups; rcu_assign_pointer(nl_table[unit].listeners, listeners); - nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; @@ -2326,15 +2320,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken) netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { - struct mutex *extra_mutex = nlk->dump_cb_mutex; - cb->extack = &extack; - if (extra_mutex) - mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); - if (extra_mutex) - mutex_unlock(extra_mutex); /* EMSGSIZE plus something already in the skb means * that there's more to dump but current skb has filled up. -- cgit v1.2.3 From 1d4ce389da2b84e0e24aad5e83fe9c742adc3f99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 7 Jun 2024 14:22:33 -0700 Subject: ice: add and use roundup_u64 instead of open coding equivalent In ice_ptp_cfg_clkout(), the ice driver needs to calculate the nearest next second of a current time value specified in nanoseconds. It implements this using div64_u64, because the time value is a u64. It could use div_u64 since NSEC_PER_SEC is smaller than 32-bits. Ideally this would be implemented directly with roundup(), but that can't work on all platforms due to a division which requires using the specific macros and functions due to platform restrictions, and to ensure that the most appropriate and fast instructions are used. The kernel doesn't currently provide any 64-bit equivalents for doing roundup. Attempting to use roundup() on a 32-bit platform will result in a link failure due to not having a direct 64-bit division. The closest equivalent for this is DIV64_U64_ROUND_UP, which does a division always rounding up. However, this only computes the division, and forces use of the div64_u64 in cases where the divisor is a 32bit value and could make use of div_u64. Introduce DIV_U64_ROUND_UP based on div_u64, and then use it to implement roundup_u64 which takes a u64 input value and a u32 rounding value. The name roundup_u64 matches the naming scheme of div_u64, and future patches could implement roundup64_u64 if they need to round by a multiple that is greater than 32-bits. Replace the logic in ice_ptp.c which does this equivalent with the newly added roundup_u64. Tested-by: Pucha Himasekhar Reddy Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20240607-next-2024-06-03-intel-next-batch-v3-2-d1470cee3347@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +-- include/linux/math64.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index adbb9cffe20c..b7ab6fdf710d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1773,8 +1773,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, * maintaining phase */ if (start_time < current_time) - start_time = div64_u64(current_time + NSEC_PER_SEC - 1, - NSEC_PER_SEC) * NSEC_PER_SEC + phase; + start_time = roundup_u64(current_time, NSEC_PER_SEC) + phase; if (ice_is_e810(hw)) start_time -= E810_OUT_PROP_DELAY_NS; diff --git a/include/linux/math64.h b/include/linux/math64.h index d34def7f9a8c..6aaccc1626ab 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -297,6 +297,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) +/** + * DIV_U64_ROUND_UP - unsigned 64bit divide with 32bit divisor rounded up + * @ll: unsigned 64bit dividend + * @d: unsigned 32bit divisor + * + * Divide unsigned 64bit dividend by unsigned 32bit divisor + * and round up. + * + * Return: dividend / divisor rounded up + */ +#define DIV_U64_ROUND_UP(ll, d) \ + ({ u32 _tmp = (d); div_u64((ll) + _tmp - 1, _tmp); }) + /** * DIV64_U64_ROUND_CLOSEST - unsigned 64bit divide with 64bit divisor rounded to nearest integer * @dividend: unsigned 64bit dividend @@ -342,4 +355,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); div_s64((__x - (__d / 2)), __d); \ } \ ) + +/** + * roundup_u64 - Round up a 64bit value to the next specified 32bit multiple + * @x: the value to up + * @y: 32bit multiple to round up to + * + * Rounds @x to the next multiple of @y. For 32bit @x values, see roundup and + * the faster round_up() for powers of 2. + * + * Return: rounded up value. + */ +static inline u64 roundup_u64(u64 x, u32 y) +{ + return DIV_U64_ROUND_UP(x, y) * y; +} #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From fa59dc2f6fc616fde3941f62ccd4adcaa21ccd47 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 7 Jun 2024 18:25:24 +0800 Subject: net: core,vrf: Change pcpu_dstat fields to u64_stats_t The pcpu_sw_netstats and pcpu_lstats structs both contain a set of u64_stats_t fields for individual stats, but pcpu_dstats uses u64s instead. Make this consistent by using u64_stats_t across all stats types. The per-cpu dstats are only used by the vrf driver at present, so update that driver as part of this change. Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607-dstats-v3-1-cc781fe116f7@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 38 ++++++++++++++++++++++---------------- include/linux/netdevice.h | 12 ++++++------ 2 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a252ac5dd28..5018831b2a79 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -126,8 +126,8 @@ static void vrf_rx_stats(struct net_device *dev, int len) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_packets++; - dstats->rx_bytes += len; + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); u64_stats_update_end(&dstats->syncp); } @@ -150,11 +150,11 @@ static void vrf_get_stats64(struct net_device *dev, dstats = per_cpu_ptr(dev->dstats, i); do { start = u64_stats_fetch_begin(&dstats->syncp); - tbytes = dstats->tx_bytes; - tpkts = dstats->tx_packets; - tdrops = dstats->tx_drops; - rbytes = dstats->rx_bytes; - rpkts = dstats->rx_packets; + tbytes = u64_stats_read(&dstats->tx_bytes); + tpkts = u64_stats_read(&dstats->tx_packets); + tdrops = u64_stats_read(&dstats->tx_drops); + rbytes = u64_stats_read(&dstats->rx_bytes); + rpkts = u64_stats_read(&dstats->rx_packets); } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -408,10 +408,15 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { vrf_rx_stats(dev, len); - else - this_cpu_inc(dev->dstats->rx_drops); + } else { + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); + } return NETDEV_TX_OK; } @@ -599,19 +604,20 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); + u64_stats_update_begin(&dstats->syncp); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - u64_stats_update_begin(&dstats->syncp); - dstats->tx_packets++; - dstats->tx_bytes += len; - u64_stats_update_end(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); } else { - this_cpu_inc(dev->dstats->tx_drops); + u64_stats_inc(&dstats->tx_drops); } + u64_stats_update_end(&dstats->syncp); return ret; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d20c6c99eb88..f148a01dd1d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2731,12 +2731,12 @@ struct pcpu_sw_netstats { } __aligned(4 * sizeof(u64)); struct pcpu_dstats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_drops; - u64 tx_packets; - u64 tx_bytes; - u64 tx_drops; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_drops; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); -- cgit v1.2.3 From 3966a668bfeef49e265e4975a2cdc55fb931036d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:55 +0100 Subject: net/tcp: Use static_branch_tcp_{md5,ao} to drop ifdefs It's possible to clean-up some ifdefs by hiding that tcp_{md5,ao}_needed static branch is defined and compiled only under related configs, since commit 4c8530dc7d7d ("net/tcp: Only produce AO/MD5 logs if there are any keys"). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++---------- net/ipv4/tcp_ipv4.c | 8 ++------ 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a70fc39090fe..e5427b05129b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2386,21 +2386,15 @@ static inline void tcp_get_current_key(const struct sock *sk, static inline bool tcp_key_is_md5(const struct tcp_key *key) { -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - key->type == TCP_KEY_MD5) - return true; -#endif + if (static_branch_tcp_md5()) + return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { -#ifdef CONFIG_TCP_AO - if (static_branch_unlikely(&tcp_ao_needed.key) && - key->type == TCP_KEY_AO) - return true; -#endif + if (static_branch_tcp_ao()) + return key->type == TCP_KEY_AO; return false; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e61c7c974745..de0c8f43448a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1054,12 +1054,10 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, @@ -1128,8 +1126,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { const union tcp_md5_addr *addr; int l3index; @@ -1138,7 +1135,6 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, seq, -- cgit v1.2.3 From 72863087f635367323693b9ab83c3107e0353c5f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:56 +0100 Subject: net/tcp: Add a helper tcp_ao_hdr_maclen() It's going to be used more in TCP-AO tracepoints. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 5 +++++ net/ipv4/tcp_ao.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 5d8e9ed2c005..198e02004ad2 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -19,6 +19,11 @@ struct tcp_ao_hdr { u8 rnext_keyid; }; +static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh) +{ + return aoh->length - sizeof(struct tcp_ao_hdr); +} + struct tcp_ao_counters { atomic64_t pkt_good; atomic64_t pkt_bad; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..50ae43c92829 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -884,8 +884,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, u8 *traffic_key, u8 *phash, u32 sne, int l3index) { - u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); void *hash_buf = NULL; if (maclen != tcp_ao_maclen(key)) { -- cgit v1.2.3 From 811efc06e5f30a57030451b2d1998aa81273baf8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:57 +0100 Subject: net/tcp: Move tcp_inbound_hash() from headers Two reasons: 1. It's grown up enough 2. In order to not do header spaghetti by including , which is necessary for TCP tracepoints. While at it, unexport and make static tcp_inbound_ao_hash(). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 78 +++---------------------------------------------------- net/ipv4/tcp.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e5427b05129b..2aac11e7e1cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,12 +1863,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return __tcp_md5_do_lookup(sk, 0, addr, family, true); } -enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location); - - #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1885,13 +1879,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return NULL; } -static inline enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location) -{ - return SKB_NOT_DROPPED_YET; -} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -2806,66 +2793,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } -/* Called with rcu_read_lock() */ -static inline enum skb_drop_reason -tcp_inbound_hash(struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int dif, int sdif) -{ - const struct tcphdr *th = tcp_hdr(skb); - const struct tcp_ao_hdr *aoh; - const __u8 *md5_location; - int l3index; - - /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); - return SKB_DROP_REASON_TCP_AUTH_HDR; - } - - if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); - return SKB_DROP_REASON_TCP_AOFAILURE; - } - } - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - /* Fast path: unsigned segments */ - if (likely(!md5_location && !aoh)) { - /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid - * for the remote peer. On TCP-AO established connection - * the last key is impossible to remove, so there's - * always at least one current_key. - */ - if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_AONOTFOUND; - } - if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - return SKB_NOT_DROPPED_YET; - } - - if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); - - return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, - l3index, md5_location); -} +enum skb_drop_reason tcp_inbound_hash(struct sock *sk, + const struct request_sock *req, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6553221694ec..17a4a8e4855d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4463,7 +4463,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp, EXPORT_SYMBOL(tcp_md5_hash_key); /* Called with rcu_read_lock() */ -enum skb_drop_reason +static enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, int family, int l3index, const __u8 *hash_location) @@ -4517,10 +4517,80 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, } return SKB_NOT_DROPPED_YET; } -EXPORT_SYMBOL(tcp_inbound_md5_hash); +#else +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int l3index, const __u8 *hash_location) +{ + return SKB_NOT_DROPPED_YET; +} #endif +/* Called with rcu_read_lock() */ +enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} +EXPORT_SYMBOL_GPL(tcp_inbound_hash); + void tcp_done(struct sock *sk) { struct request_sock *req; -- cgit v1.2.3 From 96be3dcd013df6aa79acf32e739c0a35b89a4f50 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:58 +0100 Subject: net/tcp: Add tcp-md5 and tcp-ao tracepoints Instead of forcing userspace to parse dmesg (that's what currently is happening, at least in codebase of my current company), provide a better way, that can be enabled/disabled in runtime. Currently, there are already tcp events, add hashing related ones there, too. Rasdaemon currently exercises net_dev_xmit_timeout, devlink_health_report, but it'll be trivial to teach it to deal with failed hashes. Otherwise, BGP may trace/log them itself. Especially exciting for possible investigations is key rotation (RNext_key requests). Suggested-by: Jakub Kicinski Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 317 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 17 +++ net/ipv4/tcp_ao.c | 13 ++ net/ipv4/tcp_input.c | 8 +- net/ipv4/tcp_output.c | 2 + 5 files changed, 355 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 49b5ee091cf6..1c8bd8e186b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -411,6 +411,323 @@ TRACE_EVENT(tcp_cong_state_set, __entry->cong_state) ); +DECLARE_EVENT_CLASS(tcp_hash_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ') +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DECLARE_EVENT_CLASS(tcp_ao_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + + TP_ARGS(sk, skb, keyid, rnext, maclen), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + + __field(__u8, keyid) + __field(__u8, rnext) + __field(__u8, maclen) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + + __entry->keyid = keyid; + __entry->rnext = rnext; + __entry->maclen = maclen; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ', + __entry->keyid, __entry->rnext, __entry->maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sk, + + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + + TP_ARGS(sk, keyid, rnext), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u8, keyid) + __field(__u8, rnext) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->keyid = keyid; + __entry->rnext = rnext; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->keyid, __entry->rnext) +); + +DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + TP_ARGS(sk, keyid, rnext) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sne, + + TP_PROTO(const struct sock *sk, __u32 new_sne), + + TP_ARGS(sk, new_sne), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u32, new_sne) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->new_sne = new_sne; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17a4a8e4855d..73152ce1367e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,7 @@ #include #include #include +#include #include /* Track pending CMSGs. */ @@ -4484,6 +4485,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); + trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4513,6 +4515,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, l3index); } } + trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } return SKB_NOT_DROPPED_YET; @@ -4544,15 +4547,27 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_parse_auth_options(th, &md5_location, &aoh)) { tcp_hash_fail("TCP segment has incorrect auth options set", family, skb, ""); + trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } if (req) { if (tcp_rsk_used_ao(req) != !!aoh) { + u8 keyid, rnext, maclen; + + if (aoh) { + keyid = aoh->keyid; + rnext = aoh->rnext_keyid; + maclen = tcp_ao_hdr_maclen(aoh); + } else { + keyid = rnext = maclen = 0; + } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); tcp_hash_fail("TCP connection can't start/end using TCP-AO", family, skb, "%s", !aoh ? "missing AO" : "AO signed"); + trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } } @@ -4572,12 +4587,14 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_ao_required(sk, saddr, family, l3index, true)) { tcp_hash_fail("AO hash is required, but not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); tcp_hash_fail("MD5 Hash not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } return SKB_NOT_DROPPED_YET; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 50ae43c92829..1e5087c6cd7d 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -16,6 +16,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); @@ -895,6 +896,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, tcp_hash_fail("AO hash wrong length", family, skb, "%u != %d L3index: %d", maclen, tcp_ao_maclen(key), l3index); + trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -911,6 +914,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, atomic64_inc(&key->pkt_bad); tcp_hash_fail("AO hash mismatch", family, skb, "L3index: %d", l3index); + trace_tcp_ao_mismatch(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -927,6 +932,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, int l3index, const struct tcp_ao_hdr *aoh) { const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ struct tcp_ao_info *info; enum skb_drop_reason ret; @@ -941,6 +947,8 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); tcp_hash_fail("AO key not found", family, skb, "keyid: %u L3index: %d", aoh->keyid, l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; } @@ -981,6 +989,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, current_key = READ_ONCE(info->current_key); /* Key rotation: the peer asks us to use new key (RNext) */ if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + trace_tcp_ao_rnext_request(sk, skb, current_key->sndid, + aoh->rnext_keyid, + tcp_ao_hdr_maclen(aoh)); /* If the key is not found we do nothing. */ key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); if (key) @@ -1048,6 +1059,8 @@ key_not_found: atomic64_inc(&info->counters.key_not_found); tcp_hash_fail("Requested by the peer AO key id not found", family, skb, "L3index: %d", l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb187450e4d7..d0a1e34d69f6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3578,8 +3578,10 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && ack < tp->snd_una) + if (ao && ack < tp->snd_una) { ao->snd_sne++; + trace_tcp_ao_snd_sne_update((struct sock *)tp, ao->snd_sne); + } #endif } @@ -3604,8 +3606,10 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && seq < tp->rcv_nxt) + if (ao && seq < tp->rcv_nxt) { ao->rcv_sne++; + trace_tcp_ao_rcv_sne_update((struct sock *)tp, ao->rcv_sne); + } #endif } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 090fb0c24599..16c48df8df4c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3768,6 +3768,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; u8 keyid = tcp_rsk(req)->ao_keyid; + u8 rnext = tcp_rsk(req)->ao_rcv_next; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), keyid, -1); @@ -3777,6 +3778,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ if (unlikely(!ao_key)) { + trace_tcp_ao_synack_no_key(sk, keyid, rnext); rcu_read_unlock(); kfree_skb(skb); net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", -- cgit v1.2.3 From 78b1b27db91c7a94297a8b6a665fe7e86dfc5750 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:59 +0100 Subject: net/tcp: Remove tcp_hash_fail() Now there are tracepoints, that cover all functionality of tcp_hash_fail(), but also wire up missing places They are also faster, can be disabled and provide filtering. This potentially may create a regression if a userspace depends on dmesg logs. Fingers crossed, let's see if anyone complains in reality. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 37 ------------------------------------- net/ipv4/tcp.c | 25 ------------------------- net/ipv4/tcp_ao.c | 9 --------- 3 files changed, 71 deletions(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 198e02004ad2..1d46460d0fef 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -149,43 +149,6 @@ extern struct static_key_false_deferred tcp_ao_needed; #define static_branch_tcp_ao() false #endif -static inline bool tcp_hash_should_produce_warnings(void) -{ - return static_branch_tcp_md5() || static_branch_tcp_ao(); -} - -#define tcp_hash_fail(msg, family, skb, fmt, ...) \ -do { \ - const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[6]; \ - char *f = hdr_flags; \ - \ - if (!tcp_hash_should_produce_warnings()) \ - break; \ - if (th->fin) \ - *f++ = 'F'; \ - if (th->syn) \ - *f++ = 'S'; \ - if (th->rst) \ - *f++ = 'R'; \ - if (th->psh) \ - *f++ = 'P'; \ - if (th->ack) \ - *f++ = '.'; \ - *f = 0; \ - if ((family) == AF_INET) { \ - net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ - msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ - &ip_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } else { \ - net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ - msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ - &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } \ -} while (0) - #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ struct tcp4_ao_context { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 73152ce1367e..e03a342c9162 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4484,7 +4484,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4500,21 +4499,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - if (family == AF_INET) { - tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", - genhash ? "tcp_v4_calc_md5_hash failed" - : "", l3index); - } else { - if (genhash) { - tcp_hash_fail("MD5 Hash failed", - AF_INET6, skb, "L3 index %d", - l3index); - } else { - tcp_hash_fail("MD5 Hash mismatch", - AF_INET6, skb, "L3 index %d", - l3index); - } - } trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } @@ -4545,8 +4529,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, /* Invalid option or two times meet any of auth options */ if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } @@ -4564,9 +4546,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -4585,15 +4564,11 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * always at least one current_key. */ if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 1e5087c6cd7d..0de863aa5f66 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -893,9 +893,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash wrong length", family, skb, - "%u != %d L3index: %d", maclen, - tcp_ao_maclen(key), l3index); trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; @@ -912,8 +909,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash mismatch", family, skb, - "L3index: %d", l3index); trace_tcp_ao_mismatch(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); kfree(hash_buf); @@ -945,8 +940,6 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, info = rcu_dereference(tcp_sk(sk)->ao_info); if (!info) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); - tcp_hash_fail("AO key not found", family, skb, - "keyid: %u L3index: %d", aoh->keyid, l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; @@ -1057,8 +1050,6 @@ verify_hash: key_not_found: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); atomic64_inc(&info->counters.key_not_found); - tcp_hash_fail("Requested by the peer AO key id not found", - family, skb, "L3index: %d", l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; -- cgit v1.2.3 From 8682ad36870790ad6a9a03ac237c8d87d34b26d5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:43 +0200 Subject: wifi: cfg80211: use BIT() for flag enums Use BIT(x) instead of 1< Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.c21598fbf49c.Ib8f26c5e9f508aee19fdfa1fd4b5995f084c46d4@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 102 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5da9bb0ac6a4..051d4eb227bf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -127,31 +127,31 @@ struct wiphy; * even if it is otherwise disabled. */ enum ieee80211_channel_flags { - IEEE80211_CHAN_DISABLED = 1<<0, - IEEE80211_CHAN_NO_IR = 1<<1, - IEEE80211_CHAN_PSD = 1<<2, - IEEE80211_CHAN_RADAR = 1<<3, - IEEE80211_CHAN_NO_HT40PLUS = 1<<4, - IEEE80211_CHAN_NO_HT40MINUS = 1<<5, - IEEE80211_CHAN_NO_OFDM = 1<<6, - IEEE80211_CHAN_NO_80MHZ = 1<<7, - IEEE80211_CHAN_NO_160MHZ = 1<<8, - IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_IR_CONCURRENT = 1<<10, - IEEE80211_CHAN_NO_20MHZ = 1<<11, - IEEE80211_CHAN_NO_10MHZ = 1<<12, - IEEE80211_CHAN_NO_HE = 1<<13, - IEEE80211_CHAN_1MHZ = 1<<14, - IEEE80211_CHAN_2MHZ = 1<<15, - IEEE80211_CHAN_4MHZ = 1<<16, - IEEE80211_CHAN_8MHZ = 1<<17, - IEEE80211_CHAN_16MHZ = 1<<18, - IEEE80211_CHAN_NO_320MHZ = 1<<19, - IEEE80211_CHAN_NO_EHT = 1<<20, - IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, - IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, - IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, - IEEE80211_CHAN_CAN_MONITOR = 1<<24, + IEEE80211_CHAN_DISABLED = BIT(0), + IEEE80211_CHAN_NO_IR = BIT(1), + IEEE80211_CHAN_PSD = BIT(2), + IEEE80211_CHAN_RADAR = BIT(3), + IEEE80211_CHAN_NO_HT40PLUS = BIT(4), + IEEE80211_CHAN_NO_HT40MINUS = BIT(5), + IEEE80211_CHAN_NO_OFDM = BIT(6), + IEEE80211_CHAN_NO_80MHZ = BIT(7), + IEEE80211_CHAN_NO_160MHZ = BIT(8), + IEEE80211_CHAN_INDOOR_ONLY = BIT(9), + IEEE80211_CHAN_IR_CONCURRENT = BIT(10), + IEEE80211_CHAN_NO_20MHZ = BIT(11), + IEEE80211_CHAN_NO_10MHZ = BIT(12), + IEEE80211_CHAN_NO_HE = BIT(13), + IEEE80211_CHAN_1MHZ = BIT(14), + IEEE80211_CHAN_2MHZ = BIT(15), + IEEE80211_CHAN_4MHZ = BIT(16), + IEEE80211_CHAN_8MHZ = BIT(17), + IEEE80211_CHAN_16MHZ = BIT(18), + IEEE80211_CHAN_NO_320MHZ = BIT(19), + IEEE80211_CHAN_NO_EHT = BIT(20), + IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), + IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), + IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), + IEEE80211_CHAN_CAN_MONITOR = BIT(24), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -229,13 +229,13 @@ struct ieee80211_channel { * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode */ enum ieee80211_rate_flags { - IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, - IEEE80211_RATE_MANDATORY_A = 1<<1, - IEEE80211_RATE_MANDATORY_B = 1<<2, - IEEE80211_RATE_MANDATORY_G = 1<<3, - IEEE80211_RATE_ERP_G = 1<<4, - IEEE80211_RATE_SUPPORTS_5MHZ = 1<<5, - IEEE80211_RATE_SUPPORTS_10MHZ = 1<<6, + IEEE80211_RATE_SHORT_PREAMBLE = BIT(0), + IEEE80211_RATE_MANDATORY_A = BIT(1), + IEEE80211_RATE_MANDATORY_B = BIT(2), + IEEE80211_RATE_MANDATORY_G = BIT(3), + IEEE80211_RATE_ERP_G = BIT(4), + IEEE80211_RATE_SUPPORTS_5MHZ = BIT(5), + IEEE80211_RATE_SUPPORTS_10MHZ = BIT(6), }; /** @@ -1957,9 +1957,9 @@ struct rate_info { * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled */ enum bss_param_flags { - BSS_PARAM_FLAGS_CTS_PROT = 1<<0, - BSS_PARAM_FLAGS_SHORT_PREAMBLE = 1<<1, - BSS_PARAM_FLAGS_SHORT_SLOT_TIME = 1<<2, + BSS_PARAM_FLAGS_CTS_PROT = BIT(0), + BSS_PARAM_FLAGS_SHORT_PREAMBLE = BIT(1), + BSS_PARAM_FLAGS_SHORT_SLOT_TIME = BIT(2), }; /** @@ -2266,13 +2266,13 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { - MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, - MONITOR_FLAG_FCSFAIL = 1< Date: Thu, 23 May 2024 12:09:44 +0200 Subject: wifi: ieee80211: remove unused enum ieee80211_client_reg_power This has never been used, and it's really not directly representing the spec, so shouldn't have been here in the first place. Remove it. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.32ed8fc1522d.Id4480d162e1921478e33d145890dc16c263b57bf@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 769008a51809..456a55bd6c6b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2426,24 +2426,6 @@ enum ieee80211_ap_reg_power { IEEE80211_REG_AP_POWER_AFTER_LAST - 1, }; -/** - * enum ieee80211_client_reg_power - regulatory power for a client - * - * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode - * @IEEE80211_REG_DEFAULT_CLIENT: Default Client - * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client - * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal - * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value - */ -enum ieee80211_client_reg_power { - IEEE80211_REG_UNSET_CLIENT, - IEEE80211_REG_DEFAULT_CLIENT, - IEEE80211_REG_SUBORDINATE_CLIENT, - IEEE80211_REG_CLIENT_POWER_AFTER_LAST, - IEEE80211_REG_CLIENT_POWER_MAX = - IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 -- cgit v1.2.3 From 0a9314ad5f45aeb10326dce33c5d70b85f74ef2d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:45 +0200 Subject: wifi: cfg80211: move enum ieee80211_ap_reg_power to cfg80211 This really shouldn't have been in ieee80211.h, since it doesn't directly represent the spec. Move it to cfg80211 rather than mac80211 since upcoming changes will use it there. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.962b16c831cd.I5745962525b1b176c5b90d37b3720fc100eee406@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 -------------------- include/net/cfg80211.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 456a55bd6c6b..30cef3b940eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2406,26 +2406,6 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); -/** - * enum ieee80211_ap_reg_power - regulatory power for a Access Point - * - * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode - * @IEEE80211_REG_LPI_AP: Indoor Access Point - * @IEEE80211_REG_SP_AP: Standard power Access Point - * @IEEE80211_REG_VLP_AP: Very low power Access Point - * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal - * @IEEE80211_REG_AP_POWER_MAX: maximum value - */ -enum ieee80211_ap_reg_power { - IEEE80211_REG_UNSET_AP, - IEEE80211_REG_LPI_AP, - IEEE80211_REG_SP_AP, - IEEE80211_REG_VLP_AP, - IEEE80211_REG_AP_POWER_AFTER_LAST, - IEEE80211_REG_AP_POWER_MAX = - IEEE80211_REG_AP_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 051d4eb227bf..fb7d76513e1c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6081,6 +6081,21 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, void wiphy_delayed_work_flush(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +/** + * enum ieee80211_ap_reg_power - regulatory power for an Access Point + * + * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode + * @IEEE80211_REG_LPI_AP: Indoor Access Point + * @IEEE80211_REG_SP_AP: Standard power Access Point + * @IEEE80211_REG_VLP_AP: Very low power Access Point + */ +enum ieee80211_ap_reg_power { + IEEE80211_REG_UNSET_AP, + IEEE80211_REG_LPI_AP, + IEEE80211_REG_SP_AP, + IEEE80211_REG_VLP_AP, +}; + /** * struct wireless_dev - wireless device state * -- cgit v1.2.3 From 9fd171a71b9d4cd8855891c0ba7e2a152139b41a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:48 +0200 Subject: wifi: cfg80211: refactor regulatory beaconing checking There are two functions exported now, with different settings, refactor to just export a single function that take a struct with different settings. This will make it easier to add more parameters. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.d44c34dadfc2.I59b4403108e0dbf7fc6ae8f7522e1af520cffb1c@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 54 ++++++++++++++++++++++++++++++++++++++++++++------ net/wireless/chan.c | 30 +++++++++++----------------- 2 files changed, 60 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb7d76513e1c..45ffeb110d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8800,6 +8800,31 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, sig_dbm); } +/** + * struct cfg80211_beaconing_check_config - beacon check configuration + * @iftype: the interface type to check for + * @relax: allow IR-relaxation conditions to apply (e.g. another + * interface connected already on the same channel) + * NOTE: If this is set, wiphy mutex must be held. + */ +struct cfg80211_beaconing_check_config { + enum nl80211_iftype iftype; + bool relax; +}; + +/** + * cfg80211_reg_check_beaconing - check if beaconing is allowed + * @wiphy: the wiphy + * @chandef: the channel definition + * @cfg: additional parameters for the checking + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.) + */ +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg); + /** * cfg80211_reg_can_beacon - check if beaconing is allowed * @wiphy: the wiphy @@ -8809,9 +8834,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * Return: %true if there is no secondary channel or the secondary channel(s) * can be used for beaconing (i.e. is not a radar channel etc.) */ -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation @@ -8826,9 +8859,18 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * * Context: Requires the wiphy mutex to be held. */ -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + .relax = true, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_ch_switch_notify - update wdev channel and notify userspace diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 360480604515..8b1796130b28 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1550,22 +1550,12 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, return res; } -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) -{ - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true); -} -EXPORT_SYMBOL(cfg80211_reg_can_beacon); - -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - bool check_no_ir; - - lockdep_assert_held(&rdev->wiphy.mtx); + bool check_no_ir = true; /* * Under certain conditions suggested by some regulatory bodies a @@ -1573,12 +1563,16 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * only if such relaxations are not enabled and the conditions are not * met. */ - check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype, - chandef->chan); + if (cfg->relax) { + lockdep_assert_held(&rdev->wiphy.mtx); + check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype, + chandef->chan); + } - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, + check_no_ir); } -EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax); +EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) -- cgit v1.2.3 From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:49 +0200 Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation Add a regulatory flag to allow VLP AP operation even on channels otherwise marked NO_IR, which may be possible in some regulatory domains/countries. Note that this requires checking also when the beacon is changed, since that may change the regulatory power type. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/chan.c | 37 ++++++++++++++++++++++++++----------- net/wireless/nl80211.c | 27 +++++++++++++++++++++++++-- net/wireless/reg.c | 2 ++ net/wireless/trace.h | 15 +++++++++------ 6 files changed, 74 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45ffeb110d36..6f992aff74ae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -125,6 +125,8 @@ struct wiphy; * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor * mode even in the presence of other (regulatory) restrictions, * even if it is otherwise disabled. + * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation + * with very low power (VLP), even if otherwise set to NO_IR. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -152,6 +154,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), IEEE80211_CHAN_CAN_MONITOR = BIT(24), + IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -8806,9 +8809,12 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * @relax: allow IR-relaxation conditions to apply (e.g. another * interface connected already on the same channel) * NOTE: If this is set, wiphy mutex must be held. + * @reg_power: &enum ieee80211_ap_reg_power value indicating the + * advertised/used 6 GHz regulatory power setting */ struct cfg80211_beaconing_check_config { enum nl80211_iftype iftype; + enum ieee80211_ap_reg_power reg_power; bool relax; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f917bc6c9b6f..6ae3997061b6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor * mode despite other (regulatory) restrictions, even if the channel is * otherwise completely disabled. + * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a + * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, NL80211_FREQUENCY_ATTR_CAN_MONITOR, + NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr { * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted + * despite NO_IR configuration. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS_CONCURRENT = 1<<21, NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, + NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1<<24, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 8b1796130b28..bf2fdcd42019 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1523,28 +1523,38 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, - bool check_no_ir) + u32 prohibited_flags, + u32 permitting_flags) { - bool res; - u32 prohibited_flags = IEEE80211_CHAN_DISABLED; + bool res, check_radar; int dfs_required; - trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, + prohibited_flags, + permitting_flags); - if (check_no_ir) - prohibited_flags |= IEEE80211_CHAN_NO_IR; + if (!_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_DISABLED, 0)) + return false; dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype); - if (dfs_required != 0) - prohibited_flags |= IEEE80211_CHAN_RADAR; + check_radar = dfs_required != 0; if (dfs_required > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ - prohibited_flags = IEEE80211_CHAN_DISABLED; + prohibited_flags &= ~IEEE80211_CHAN_NO_IR; + check_radar = false; } - res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); + if (check_radar && + !_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_RADAR, 0)) + return false; + + res = _cfg80211_chandef_usable(wiphy, chandef, + prohibited_flags, + permitting_flags); trace_cfg80211_return_bool(res); return res; @@ -1555,6 +1565,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + u32 permitting_flags = 0; bool check_no_ir = true; /* @@ -1569,8 +1580,12 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, chandef->chan); } + if (cfg->reg_power == IEEE80211_REG_VLP_AP) + permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, - check_no_ir); + check_no_ir ? IEEE80211_CHAN_NO_IR : 0, + permitting_flags); } EXPORT_SYMBOL(cfg80211_reg_check_beaconing); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4628280abf1d..a94e73c133f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1207,6 +1207,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -5954,6 +5957,7 @@ static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6103,8 +6107,13 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, - wdev->iftype)) { + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, ¶ms->chandef, + &beacon_check)) { err = -EINVAL; goto out; } @@ -6261,6 +6270,7 @@ out: static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6287,6 +6297,19 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + /* recheck beaconing is permitted with possibly changed power type */ + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, + &wdev->links[link_id].ap.chandef, + &beacon_check)) { + err = -EINVAL; + goto out; + } + attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { err = nl80211_parse_fils_discovery(rdev, attr, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3cef0021a3db..4a27f3823e25 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1600,6 +1600,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT; if (rd_flags & NL80211_RRF_PSD) channel_flags |= IEEE80211_CHAN_PSD; + if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP) + channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; return channel_flags; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6ef9294747e3..5c26f065bd68 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3389,23 +3389,26 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype, bool check_no_ir), - TP_ARGS(wiphy, chandef, iftype, check_no_ir), + enum nl80211_iftype iftype, u32 prohibited_flags, + u32 permitting_flags), + TP_ARGS(wiphy, chandef, iftype, prohibited_flags, permitting_flags), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) - __field(bool, check_no_ir) + __field(u32, prohibited_flags) + __field(u32, permitting_flags) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; - __entry->check_no_ir = check_no_ir; + __entry->prohibited_flags = prohibited_flags; + __entry->permitting_flags = permitting_flags; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s", + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d prohibited_flags=0x%x permitting_flags=0x%x", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, - BOOL_TO_STR(__entry->check_no_ir)) + __entry->prohibited_flags, __entry->permitting_flags) ); TRACE_EVENT(cfg80211_chandef_dfs_required, -- cgit v1.2.3 From ec209ad86324de84ef66990f0e9df0851e45e054 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:32 -0600 Subject: bpf: verifier: Relax caller requirements for kfunc projection type args Currently, if a kfunc accepts a projection type as an argument (eg struct __sk_buff *), the caller must exactly provide exactly the same type with provable provenance. However in practice, kfuncs that accept projection types _must_ cast to the underlying type before use b/c projection type layouts are completely made up. Thus, it is ok to relax the verifier rules around implicit conversions. We will use this functionality in the next commit when we align kfuncs to user-facing types. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/e2c025cb09ccfd4af1ec9e18284dc3cecff7514d.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/btf.c | 13 ++++++++++--- kernel/bpf/verifier.c | 10 +++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..56d91daacdba 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -531,6 +531,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); +bool btf_is_projection_of(const char *pname, const char *tname); bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7928d920056f..ce4707968217 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5820,6 +5820,15 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) return ctx_type->type; } +bool btf_is_projection_of(const char *pname, const char *tname) +{ + if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) + return true; + if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + return true; + return false; +} + bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) @@ -5882,9 +5891,7 @@ again: * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ - if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) - return true; - if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + if (btf_is_projection_of(ctx_tname, tname)) return true; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20ac9cfd54dd..dcac6119d810 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11265,6 +11265,8 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, bool strict_type_match = false; const struct btf *reg_btf; const char *reg_ref_tname; + bool taking_projection; + bool struct_same; u32 reg_ref_id; if (base_type(reg->type) == PTR_TO_BTF_ID) { @@ -11308,7 +11310,13 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); - if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match); + /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot + * actually use it -- it must cast to the underlying type. So we allow + * caller to pass in the underlying type. + */ + taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); + if (!taking_projection && !struct_same) { verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, btf_type_str(reg_ref_t), reg_ref_tname); -- cgit v1.2.3 From cce4c40b960673f9e020835def310f1e89d3a940 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:33 -0600 Subject: bpf: treewide: Align kfunc signatures to prog point-of-view Previously, kfunc declarations in bpf_kfuncs.h (and others) used "user facing" types for kfuncs prototypes while the actual kfunc definitions used "kernel facing" types. More specifically: bpf_dynptr vs bpf_dynptr_kern, __sk_buff vs sk_buff, and xdp_md vs xdp_buff. It wasn't an issue before, as the verifier allows aliased types. However, since we are now generating kfunc prototypes in vmlinux.h (in addition to keeping bpf_kfuncs.h around), this conflict creates compilation errors. Fix this conflict by using "user facing" types in kfunc definitions. This results in more casts, but otherwise has no additional runtime cost. Note, similar to 5b268d1ebcdc ("bpf: Have bpf_rdonly_cast() take a const pointer"), we also make kfuncs take const arguments where appropriate in order to make the kfunc more permissive. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/b58346a63a0e66bc9b7504da751b526b0b189a67.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- fs/verity/measure.c | 5 +-- include/linux/bpf.h | 8 ++--- kernel/bpf/crypto.c | 24 ++++++++----- kernel/bpf/helpers.c | 39 +++++++++++++++------- kernel/bpf/verifier.c | 2 +- kernel/trace/bpf_trace.c | 15 +++++---- net/core/filter.c | 32 +++++++++++------- .../testing/selftests/bpf/progs/ip_check_defrag.c | 10 +++--- .../selftests/bpf/progs/verifier_netfilter_ctx.c | 6 ++-- 9 files changed, 88 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/fs/verity/measure.c b/fs/verity/measure.c index 3969d54158d1..175d2f1bc089 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -111,14 +111,15 @@ __bpf_kfunc_start_defs(); /** * bpf_get_fsverity_digest: read fsverity digest of file * @file: file to get digest from - * @digest_ptr: (out) dynptr for struct fsverity_digest + * @digest_p: (out) dynptr for struct fsverity_digest * * Read fsverity_digest of *file* into *digest_ptr*. * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_kern *digest_ptr) +__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p) { + struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; const struct inode *inode = file_inode(file); u32 dynptr_sz = __bpf_dynptr_size(digest_ptr); struct fsverity_digest *arg; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f636b4998bf7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3265,8 +3265,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3288,8 +3288,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } -static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr) +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c index 2bee4af91e38..3c1de0e5c0bd 100644 --- a/kernel/bpf/crypto.c +++ b/kernel/bpf/crypto.c @@ -311,11 +311,15 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, * Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, true); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, true); } /** @@ -328,11 +332,15 @@ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, * Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, false); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, false); } __bpf_kfunc_end_defs(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6f1abcb4b084..3ac521c48bba 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2459,9 +2459,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; @@ -2543,9 +2544,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; @@ -2571,11 +2574,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ - return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); + return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) @@ -2592,36 +2596,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en return 0; } -__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + return !ptr->data; } -__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } -__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, - struct bpf_dynptr_kern *clone__uninit) +__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, + struct bpf_dynptr *clone__uninit) { + struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) { - bpf_dynptr_set_null(clone__uninit); + bpf_dynptr_set_null(clone); return -EINVAL; } - *clone__uninit = *ptr; + *clone = *ptr; return 0; } @@ -2986,7 +2999,9 @@ late_initcall(kfunc_init); */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { - return bpf_dynptr_slice(ptr, 0, NULL, len); + const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; + + return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dcac6119d810..acc9dd830807 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10914,7 +10914,7 @@ enum { }; BTF_ID_LIST(kf_arg_btf_ids) -BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_dynptr) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bc16e21a2a44..4b3fda456299 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1369,8 +1369,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature - * @data_ptr: data to verify - * @sig_ptr: signature of the data + * @data_p: data to verify + * @sig_p: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* @@ -1378,10 +1378,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, - struct bpf_dynptr_kern *sig_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { + struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; + struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; @@ -1444,7 +1446,7 @@ __bpf_kfunc_start_defs(); * bpf_get_file_xattr - get xattr of a file * @file: file to get xattr from * @name__str: name of the xattr - * @value_ptr: output buffer of the xattr value + * @value_p: output buffer of the xattr value * * Get xattr *name__str* of *file* and store the output in *value_ptr*. * @@ -1453,8 +1455,9 @@ __bpf_kfunc_start_defs(); * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, - struct bpf_dynptr_kern *value_ptr) + struct bpf_dynptr *value_p) { + struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; struct dentry *dentry; u32 value_len; void *value; diff --git a/net/core/filter.c b/net/core/filter.c index 7c46ecba3b01..73722790cee3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11859,28 +11859,34 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } __bpf_kfunc_start_defs(); -__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct sk_buff *skb = (struct sk_buff *)s; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); return 0; } -__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct xdp_buff *xdp = (struct xdp_buff *)x; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); + bpf_dynptr_init(ptr, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); return 0; } @@ -11906,10 +11912,11 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, struct bpf_tcp_req_attrs *attrs, int attrs__sz) { #if IS_ENABLED(CONFIG_SYN_COOKIES) + struct sk_buff *skb = (struct sk_buff *)s; const struct request_sock_ops *ops; struct inet_request_sock *ireq; struct tcp_request_sock *treq; @@ -12004,16 +12011,17 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, __bpf_kfunc_end_defs(); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; int err; err = bpf_dynptr_from_skb(skb, flags, ptr__uninit); if (err) return err; - bpf_dynptr_set_rdonly(ptr__uninit); + bpf_dynptr_set_rdonly(ptr); return 0; } diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 1c2b6c1616b0..645b2c9f7867 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,7 +12,7 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h) return ip6h->nexthdr == NEXTHDR_FRAGMENT; } -static int handle_v4(struct sk_buff *skb) +static int handle_v4(struct __sk_buff *skb) { struct bpf_dynptr ptr; u8 iph_buf[20] = {}; @@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb) return NF_ACCEPT; } -static int handle_v6(struct sk_buff *skb) +static int handle_v6(struct __sk_buff *skb) { struct bpf_dynptr ptr; struct ipv6hdr *ip6h; @@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb) SEC("netfilter") int defrag(struct bpf_nf_ctx *ctx) { - struct sk_buff *skb = ctx->skb; + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; - switch (bpf_ntohs(skb->protocol)) { + switch (bpf_ntohs(ctx->skb->protocol)) { case ETH_P_IP: return handle_v4(skb); case ETH_P_IPV6: diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index 65bba330e7e5..ab9f9f2620ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -90,8 +90,8 @@ __success __failure_unpriv __retval(0) int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) { + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; const struct nf_hook_state *state = ctx->state; - struct sk_buff *skb = ctx->skb; const struct iphdr *iph; const struct tcphdr *th; u8 buffer_iph[20] = {}; @@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) struct bpf_dynptr ptr; uint8_t ihl; - if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_ACCEPT; iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); -- cgit v1.2.3 From 7fc45cb68696c7213c484ec81892bc8a986fde52 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Mon, 10 Jun 2024 03:28:39 -0700 Subject: net: mana: Allow variable size indirection table Allow variable size indirection table allocation in MANA instead of using a constant value MANA_INDIRECT_TABLE_SIZE. The size is now derived from the MANA_QUERY_VPORT_CONFIG and the indirection table is allocated dynamically. Signed-off-by: Shradha Gupta Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/qp.c | 10 +-- drivers/net/ethernet/microsoft/mana/mana_en.c | 85 ++++++++++++++++++---- drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 27 +++++-- include/net/mana/gdma.h | 4 +- include/net/mana/mana.h | 9 ++- 5 files changed, 104 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ba13c5abf8ef..2d411a16a127 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); - req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE); + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -41,18 +41,18 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, if (log_ind_tbl_size) req->rss_enable = true; - req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; + req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->update_indir_tab = true; req->cqe_coalescing_enable = 1; /* The ind table passed to the hardware must have - * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb + * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb * ind_table to MANA_INDIRECT_TABLE_SIZE if required */ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) { req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, req->indir_tab[i]); @@ -137,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, } ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; - if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { + if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) { ibdev_dbg(&mdev->ib_dev, "Indirect table size %d exceeding limit\n", ind_tbl_size); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d087cf954f75..b89ad4afd66e 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -481,7 +481,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -721,6 +721,13 @@ static void mana_cleanup_port_context(struct mana_port_context *apc) apc->rxqs = NULL; } +static void mana_cleanup_indir_table(struct mana_port_context *apc) +{ + apc->indir_table_sz = 0; + kfree(apc->indir_table); + kfree(apc->rxobj_table); +} + static int mana_init_port_context(struct mana_port_context *apc) { apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), @@ -962,7 +969,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -1054,14 +1070,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; u32 req_buf_size; int err; - req_buf_size = struct_size(req, indir_tab, num_entries); + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1072,7 +1087,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; + req->num_indir_entries = apc->indir_table_sz; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->rx_enable = rx; @@ -1111,7 +1126,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -2344,11 +2359,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2361,7 +2398,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2466,7 +2503,6 @@ static int mana_init_port(struct net_device *ndev) struct mana_port_context *apc = netdev_priv(ndev); u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; int err; err = mana_init_port_context(apc); @@ -2474,7 +2510,7 @@ static int mana_init_port(struct net_device *ndev) return err; err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2723,6 +2759,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2739,11 +2779,13 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + mana_cleanup_indir_table(apc); reset_apc: kfree(apc->rxqs); apc->rxqs = NULL; @@ -2872,16 +2914,30 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) + /* we log the port for which the probe failed and stop + * probes for subsequent ports. + * Note that we keep running ports, for which the probes + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Probe Failed for port %d\n", i); break; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) + /* we log the port for which the attach failed and stop + * attach for subsequent ports + * Note that we keep running ports, for which the attach + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Attach Failed for port %d\n", i); break; + } } } @@ -2897,6 +2953,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2908,6 +2965,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2931,6 +2989,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + mana_cleanup_indir_table(apc); rtnl_unlock(); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index ab2413d71f6c..146d5db1792f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -245,7 +245,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev) static u32 mana_rss_indir_size(struct net_device *ndev) { - return MANA_INDIRECT_TABLE_SIZE; + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->indir_table_sz; } static int mana_get_rxfh(struct net_device *ndev, @@ -257,7 +259,7 @@ static int mana_get_rxfh(struct net_device *ndev, rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) rxfh->indir[i] = apc->indir_table[i]; } @@ -273,8 +275,8 @@ static int mana_set_rxfh(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); bool update_hash = false, update_table = false; - u32 save_table[MANA_INDIRECT_TABLE_SIZE]; u8 save_key[MANA_HASH_KEY_SIZE]; + u32 *save_table; int i, err; if (!apc->port_is_up) @@ -284,13 +286,19 @@ static int mana_set_rxfh(struct net_device *ndev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!save_table) + return -ENOMEM; + if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) - if (rxfh->indir[i] >= apc->num_queues) - return -EINVAL; + for (i = 0; i < apc->indir_table_sz; i++) + if (rxfh->indir[i] >= apc->num_queues) { + err = -EINVAL; + goto cleanup; + } update_table = true; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { save_table[i] = apc->indir_table[i]; apc->indir_table[i] = rxfh->indir[i]; } @@ -306,7 +314,7 @@ static int mana_set_rxfh(struct net_device *ndev, if (err) { /* recover to original values */ if (update_table) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = save_table[i]; } @@ -316,6 +324,9 @@ static int mana_set_rxfh(struct net_device *ndev, mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); } +cleanup: + kfree(save_table); + return err; } diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..c547756c4284 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -543,11 +543,13 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ - GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 561f6719fb4e..59823901b74f 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -30,8 +30,8 @@ enum TRI_STATE { }; /* Number of entries for hardware indirection table must be in power of 2 */ -#define MANA_INDIRECT_TABLE_SIZE 64 -#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) +#define MANA_INDIRECT_TABLE_MAX_SIZE 512 +#define MANA_INDIRECT_TABLE_DEF_SIZE 64 /* The Toeplitz hash key's length in bytes: should be multiple of 8 */ #define MANA_HASH_KEY_SIZE 40 @@ -410,10 +410,11 @@ struct mana_port_context { struct mana_tx_qp *tx_qp; /* Indirection Table for RX & TX. The values are queue indexes */ - u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + u32 *indir_table; + u32 indir_table_sz; /* Indirection table containing RxObject Handles */ - mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + mana_handle_t *rxobj_table; /* Hash key used by the NIC */ u8 hashkey[MANA_HASH_KEY_SIZE]; -- cgit v1.2.3 From b975d3ee5962237c1e2f5d5aeeaaf0dc2173486c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:39 +0200 Subject: net: add and use skb_get_hash_net Years ago flow dissector gained ability to delegate flow dissection to a bpf program, scoped per netns. Unfortunately, skb_get_hash() only gets an sk_buff argument instead of both net+skb. This means the flow dissector needs to obtain the netns pointer from somewhere else. The netns is derived from skb->dev, and if that is not available, from skb->sk. If neither is set, we hit a (benign) WARN_ON_ONCE(). Trying both dev and sk covers most cases, but not all, as recently reported by Christoph Paasch. In case of nf-generated tcp reset, both sk and dev are NULL: WARNING: .. net/core/flow_dissector.c:1104 skb_flow_dissect_flow_keys include/linux/skbuff.h:1536 [inline] skb_get_hash include/linux/skbuff.h:1578 [inline] nft_trace_init+0x7d/0x120 net/netfilter/nf_tables_trace.c:320 nft_do_chain+0xb26/0xb90 net/netfilter/nf_tables_core.c:268 nft_do_chain_ipv4+0x7a/0xa0 net/netfilter/nft_chain_filter.c:23 nf_hook_slow+0x57/0x160 net/netfilter/core.c:626 __ip_local_out+0x21d/0x260 net/ipv4/ip_output.c:118 ip_local_out+0x26/0x1e0 net/ipv4/ip_output.c:127 nf_send_reset+0x58c/0x700 net/ipv4/netfilter/nf_reject_ipv4.c:308 nft_reject_ipv4_eval+0x53/0x90 net/ipv4/netfilter/nft_reject_ipv4.c:30 [..] syzkaller did something like this: table inet filter { chain input { type filter hook input priority filter; policy accept; meta nftrace set 1 tcp dport 42 reject with tcp reset } chain output { type filter hook output priority filter; policy accept; # empty chain is enough } } ... then sends a tcp packet to port 42. Initial attempt to simply set skb->dev from nf_reject_ipv4 doesn't cover all cases: skbs generated via ipv4 igmp_send_report trigger similar splat. Moreover, Pablo Neira found that nft_hash.c uses __skb_get_hash_symmetric() which would trigger same warn splat for such skbs. Lets allow callers to pass the current netns explicitly. The nf_trace infrastructure is adjusted to use the new helper. __skb_get_hash_symmetric is handled in the next patch. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/494 Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-2-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++-- net/core/flow_dissector.c | 15 +++++++++++---- net/netfilter/nf_tables_trace.c | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe7d8dbef77e..6e78019f899a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,7 +1498,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } -void __skb_get_hash(struct sk_buff *skb); +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, @@ -1578,10 +1578,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_net(net, skb); + + return skb->hash; +} + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) - __skb_get_hash(skb); + __skb_get_hash_net(NULL, skb); return skb->hash; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 59fe46077b3c..702b4f0a70b6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1860,7 +1860,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); /** - * __skb_get_hash: calculate a flow hash + * __skb_get_hash_net: calculate a flow hash + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to calculate flow hash from * * This function calculates a flow hash based on src/dst addresses @@ -1868,18 +1869,24 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_hash(struct sk_buff *skb) +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb) { struct flow_keys keys; u32 hash; + memset(&keys, 0, sizeof(keys)); + + __skb_flow_dissect(net, skb, &flow_keys_dissector, + &keys, NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, &hashrnd); + hash = __flow_hash_from_keys(&keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } -EXPORT_SYMBOL(__skb_get_hash); +EXPORT_SYMBOL(__skb_get_hash_net); __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index a83637e3f455..580c55268f65 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -317,7 +317,7 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, net_get_random_once(&trace_key, sizeof(trace_key)); info->skbid = (u32)siphash_3u32(hash32_ptr(skb), - skb_get_hash(skb), + skb_get_hash_net(nft_net(pkt), skb), skb->skb_iif, &trace_key); } -- cgit v1.2.3 From d1dab4f71d372e00e2d34a9c32bf261623e3a95c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:40 +0200 Subject: net: add and use __skb_get_hash_symmetric_net Similar to previous patch: apply same logic for __skb_get_hash_symmetric and let callers pass the netns to the dissector core. Existing function is turned into a wrapper to avoid adjusting all callers, nft_hash.c uses new function. Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-3-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++++- net/core/flow_dissector.c | 6 +++--- net/netfilter/nft_hash.c | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e78019f899a..813406a9bd6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,8 +1498,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); + +static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +{ + return __skb_get_hash_symmetric_net(NULL, skb); +} + void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); -u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 702b4f0a70b6..e479790db0f7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1845,19 +1845,19 @@ EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; -u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb) { struct flow_keys keys; __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + __skb_flow_dissect(net, skb, &flow_keys_dissector_symmetric, &keys, NULL, 0, 0, 0, 0); return __flow_hash_from_keys(&keys, &hashrnd); } -EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric_net); /** * __skb_get_hash_net: calculate a flow hash diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 92d47e469204..868d68302d22 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -51,7 +51,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 h; - h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + h = reciprocal_scale(__skb_get_hash_symmetric_net(nft_net(pkt), skb), + priv->modulus); regs->data[priv->dreg] = h + priv->offset; } -- cgit v1.2.3 From 3e453ca122d483eb519f934b6624215f0536301c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:53 +0200 Subject: net: ipv4,ipv6: Pass multipath hash computation through a helper The following patches will add a sysctl to control multipath hash seed. In order to centralize the hash computation, add a helper, fib_multipath_hash_from_keys(), and have all IPv4 and IPv6 route.c invocations of flow_hash_from_keys() go through this helper instead. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-2-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 7 +++++++ net/ipv4/route.c | 12 ++++++------ net/ipv6/route.c | 12 ++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9b2f69ba5e49..b8b3c07e8f7b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -521,6 +521,13 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif + +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + return flow_hash_from_keys(keys); +} + int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb0bdf34ed50..54512acbead7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1923,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_inner(const struct net *net, @@ -1972,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_skb(const struct net *net, @@ -2009,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl4 can be NULL */ @@ -2030,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: /* skb is currently provided only when forwarding */ @@ -2064,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2095,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad5fff5a210c..1916de615398 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2372,7 +2372,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_inner(const struct net *net, @@ -2421,7 +2421,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_skb(const struct net *net, @@ -2460,7 +2460,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl6->fl6_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl6 can be NULL */ @@ -2482,7 +2482,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: if (skb) { @@ -2514,7 +2514,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2551,7 +2551,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) -- cgit v1.2.3 From 4ee2a8cace3fb9a34aea6a56426f89d26dd514f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:54 +0200 Subject: net: ipv4: Add a sysctl to set multipath hash seed When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patch, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Like those, the multipath hash seed is a per-netns variable. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 14 ++++++++ include/net/flow_dissector.h | 2 ++ include/net/ip_fib.h | 23 +++++++++++- include/net/netns/ipv4.h | 8 +++++ net/core/flow_dissector.c | 7 ++++ net/ipv4/sysctl_net_ipv4.c | 66 ++++++++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6e99eccdb837..3616389c8c2d 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER Default: 0x0007 (source IP, destination IP and IP protocol) +fib_multipath_hash_seed - UNSIGNED INTEGER + The seed value used when calculating hash for multipath routes. Applies + to both IPv4 and IPv6 datapath. Only present for kernels built with + CONFIG_IP_ROUTE_MULTIPATH enabled. + + When set to 0, the seed value used for multipath routing defaults to an + internal random-generated one. + + The actual hashing algorithm is not specified -- there is no guarantee + that a next hop distribution effected by a given seed will keep stable + across kernel versions. + + Default: 0 (random) + fib_sync_mem - UNSIGNED INTEGER Amount of dirty memory from fib entries that can be backlogged before synchronize_rcu is forced. diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 99626475c3f4..3e47e123934d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b8b3c07e8f7b..6e7984bfb986 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); -#endif + +static void +fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) +{ + u64 mp_seed_64 = mp_seed; + + key->key[0] = (mp_seed_64 << 32) | mp_seed_64; + key->key[1] = key->key[0]; +} +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + siphash_aligned_key_t hash_key; + u32 mp_seed; + + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + fib_multipath_hash_construct_key(&hash_key, mp_seed); + + return flow_hash_from_keys_seed(keys, &hash_key); +} +#else static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { return flow_hash_from_keys(keys); } +#endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a91bb971f901..5fcd61ada622 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,13 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct sysctl_fib_multipath_hash_seed { + u32 user_seed; + u32 mp_seed; +}; +#endif + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -246,6 +253,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e479790db0f7..e64a26379807 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys) } EXPORT_SYMBOL(flow_hash_from_keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval) +{ + return __flow_hash_from_keys(keys, keyval); +} +EXPORT_SYMBOL(flow_hash_from_keys_seed); + static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, const siphash_key_t *keyval) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bb64c0ef092d..9140d20eb2d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write, return ret; } + +static u32 proc_fib_multipath_hash_rand_seed __ro_after_init; + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ + get_random_bytes(&proc_fib_multipath_hash_rand_seed, + sizeof(proc_fib_multipath_hash_rand_seed)); +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ + struct sysctl_fib_multipath_hash_seed new = { + .user_seed = user_seed, + .mp_seed = (user_seed ? user_seed : + proc_fib_multipath_hash_rand_seed), + }; + + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); +} + +static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sysctl_fib_multipath_hash_seed *mphs; + struct net *net = table->data; + struct ctl_table tmp; + u32 user_seed; + int ret; + + mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; + user_seed = mphs->user_seed; + + tmp = *table; + tmp.data = &user_seed; + + ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) { + proc_fib_multipath_hash_set_seed(net, user_seed); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); + } + + return ret; +} +#else + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ +} + #endif static struct ctl_table ipv4_table[] = { @@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &fib_multipath_hash_fields_all_mask, }, + { + .procname = "fib_multipath_hash_seed", + .data = &init_net, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_fib_multipath_hash_seed, + }, #endif { .procname = "ip_unprivileged_port_start", @@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; + proc_fib_multipath_hash_set_seed(net, 0); + return 0; err_ports: @@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void) if (!hdr) return -ENOMEM; + proc_fib_multipath_hash_init_rand_seed(); + if (register_pernet_subsys(&ipv4_sysctl_ops)) { unregister_net_sysctl_table(hdr); return -ENOMEM; -- cgit v1.2.3 From b48a1540b73a3c3a9ef59ce34176cc8180c6ce57 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sun, 9 Jun 2024 17:33:51 +0000 Subject: flow_offload: add encapsulation control flag helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two new helper functions: flow_rule_is_supp_enc_control_flags() flow_rule_has_enc_control_flags() They are intended to be used for validating encapsulation control flags, and compliment the similar helpers without "enc_" in the name. The only difference is that they have their own error message, to make it obvious if an unsupported flag error is related to FLOW_DISSECTOR_KEY_CONTROL or FLOW_DISSECTOR_KEY_ENC_CONTROL. flow_rule_has_enc_control_flags() is for drivers supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, but not supporting any encapsulation control flags. (Currently all 4 drivers fits this category) flow_rule_is_supp_enc_control_flags() is currently only used for the above helper, but should also be used by drivers once they implement at least one encapsulation control flag. There is AFAICT currently no need for an "enc_" variant of flow_rule_match_has_control_flags(), as all drivers currently supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, are already calling flow_rule_match_enc_control() directly. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240609173358.193178-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index ec9f80509f60..292cd8f4b762 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -471,6 +471,28 @@ static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags, return false; } +/** + * flow_rule_is_supp_enc_control_flags() - check for supported control flags + * @supp_enc_flags: encapsulation control flags supported by driver + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if only supported control flags are set, false otherwise. + */ +static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags, + const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0)) + return true; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported match on enc_control.flags %#x", + enc_ctrl_flags); + + return false; +} + /** * flow_rule_has_control_flags() - check for presence of any control flags * @ctrl_flags: control flags present in rule @@ -484,6 +506,19 @@ static inline bool flow_rule_has_control_flags(const u32 ctrl_flags, return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack); } +/** + * flow_rule_has_enc_control_flags() - check for presence of any control flags + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if control flags are set, false otherwise. + */ +static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack); +} + /** * flow_rule_match_has_control_flags() - match and check for any control flags * @rule: The flow_rule under evaluation. -- cgit v1.2.3 From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 6 Jun 2024 07:58:50 -0700 Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs Add special flag to validate that TC BPF program properly updates checksum information in skb. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com --- include/uapi/linux/bpf.h | 2 ++ net/bpf/test_run.c | 28 +++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 36ae54f57bf5..3c965e32fc33 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -983,7 +983,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; - if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) + if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || + kattr->test.cpu || kattr->test.batch_size) return -EINVAL; data = bpf_test_init(kattr, kattr->test.data_size_in, @@ -1031,6 +1032,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); + if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); if (!dev) { @@ -1066,9 +1068,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + + skb->csum = skb_checksum(skb, off, len, 0); + skb->ip_summed = CHECKSUM_COMPLETE; + } + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; @@ -1083,6 +1095,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } memset(__skb_push(skb, hh_len), 0, hh_len); } + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + __wsum csum; + + csum = skb_checksum(skb, off, len, 0); + + if (csum_fold(skb->csum) != csum_fold(csum)) { + ret = -EBADMSG; + goto out; + } + } + convert_skb_to___skb(skb, ctx); size = skb->len; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { -- cgit v1.2.3 From 98d7ca374ba4b39e7535613d40e159f09ca14da2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 12 Jun 2024 18:38:13 -0700 Subject: bpf: Track delta between "linked" registers. Compilers can generate the code r1 = r2 r1 += 0x1 if r2 < 1000 goto ... use knowledge of r2 range in subsequent r1 operations So remember constant delta between r2 and r1 and update r1 after 'if' condition. Unfortunately LLVM still uses this pattern for loops with 'can_loop' construct: for (i = 0; i < 1000 && can_loop; i++) The "undo" pass was introduced in LLVM https://reviews.llvm.org/D121937 to prevent this optimization, but it cannot cover all cases. Instead of fighting middle end optimizer in BPF backend teach the verifier about this pattern. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240613013815.953-3-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 12 +++- kernel/bpf/log.c | 4 +- kernel/bpf/verifier.c | 95 +++++++++++++++++++++++--- tools/testing/selftests/bpf/verifier/precise.c | 22 +++--- 4 files changed, 109 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..2b54e25d2364 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,7 +73,10 @@ enum bpf_iter_state { struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; - /* Fixed part of pointer offset, pointer types only */ + /* + * Fixed part of pointer offset, pointer types only. + * Or constant delta between "linked" scalars with the same ID. + */ s32 off; union { /* valid when type == PTR_TO_PACKET */ @@ -167,6 +170,13 @@ struct bpf_reg_state { * Similarly to dynptrs, we use ID to track "belonging" of a reference * to a specific instance of bpf_iter. */ + /* + * Upper bit of ID is used to remember relationship between "linked" + * registers. Example: + * r1 = r2; both will have r1->id == r2->id == N + * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10 + */ +#define BPF_ADD_CONST (1U << 31) u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned * from a pointer-cast helper, bpf_sk_fullsock() and diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 4bd8f17a9f24..3f4ae92e549f 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -708,7 +708,9 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); verbose(env, "("); if (reg->id) - verbose_a("id=%d", reg->id); + verbose_a("id=%d", reg->id & ~BPF_ADD_CONST); + if (reg->id & BPF_ADD_CONST) + verbose(env, "%+d", reg->off); if (reg->ref_obj_id) verbose_a("ref_obj_id=%d", reg->ref_obj_id); if (type_is_non_owning_ref(reg->type)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e857b08e1f2d..dcbbf5f64c5d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3991,7 +3991,7 @@ static bool idset_contains(struct bpf_idset *s, u32 id) u32 i; for (i = 0; i < s->count; ++i) - if (s->ids[i] == id) + if (s->ids[i] == (id & ~BPF_ADD_CONST)) return true; return false; @@ -4001,7 +4001,7 @@ static int idset_push(struct bpf_idset *s, u32 id) { if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) return -EFAULT; - s->ids[s->count++] = id; + s->ids[s->count++] = id & ~BPF_ADD_CONST; return 0; } @@ -4438,8 +4438,20 @@ static bool __is_pointer_value(bool allow_ptr_leaks, static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, struct bpf_reg_state *src_reg) { - if (src_reg->type == SCALAR_VALUE && !src_reg->id && - !tnum_is_const(src_reg->var_off)) + if (src_reg->type != SCALAR_VALUE) + return; + + if (src_reg->id & BPF_ADD_CONST) { + /* + * The verifier is processing rX = rY insn and + * rY->id has special linked register already. + * Cleared it, since multiple rX += const are not supported. + */ + src_reg->id = 0; + src_reg->off = 0; + } + + if (!src_reg->id && !tnum_is_const(src_reg->var_off)) /* Ensure that src_reg has a valid ID that will be copied to * dst_reg and then will be used by find_equal_scalars() to * propagate min/max range. @@ -14042,6 +14054,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); u8 opcode = BPF_OP(insn->code); int err; @@ -14064,11 +14077,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (dst_reg->type != SCALAR_VALUE) ptr_reg = dst_reg; - else - /* Make sure ID is cleared otherwise dst_reg min/max could be - * incorrectly propagated into other registers by find_equal_scalars() - */ - dst_reg->id = 0; + if (BPF_SRC(insn->code) == BPF_X) { src_reg = ®s[insn->src_reg]; if (src_reg->type != SCALAR_VALUE) { @@ -14132,7 +14141,43 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } - return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + if (err) + return err; + /* + * Compilers can generate the code + * r1 = r2 + * r1 += 0x1 + * if r2 < 1000 goto ... + * use r1 in memory access + * So remember constant delta between r2 and r1 and update r1 after + * 'if' condition. + */ + if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD && + dst_reg->id && is_reg_const(src_reg, alu32)) { + u64 val = reg_const_value(src_reg, alu32); + + if ((dst_reg->id & BPF_ADD_CONST) || + /* prevent overflow in find_equal_scalars() later */ + val > (u32)S32_MAX) { + /* + * If the register already went through rX += val + * we cannot accumulate another val into rx->off. + */ + dst_reg->off = 0; + dst_reg->id = 0; + } else { + dst_reg->id |= BPF_ADD_CONST; + dst_reg->off = val; + } + } else { + /* + * Make sure ID is cleared otherwise dst_reg min/max could be + * incorrectly propagated into other registers by find_equal_scalars() + */ + dst_reg->id = 0; + } + return 0; } /* check validity of 32-bit and 64-bit arithmetic operations */ @@ -15104,12 +15149,36 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg) { + struct bpf_reg_state fake_reg; struct bpf_func_state *state; struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + if (reg->type != SCALAR_VALUE || reg == known_reg) + continue; + if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) + continue; + if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || + reg->off == known_reg->off) { copy_register_state(reg, known_reg); + } else { + s32 saved_off = reg->off; + + fake_reg.type = SCALAR_VALUE; + __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off); + + /* reg = known_reg; reg += delta */ + copy_register_state(reg, known_reg); + /* + * Must preserve off, id and add_const flag, + * otherwise another find_equal_scalars() will be incorrect. + */ + reg->off = saved_off; + + scalar32_min_max_add(reg, &fake_reg); + scalar_min_max_add(reg, &fake_reg); + reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); + } })); } @@ -16738,6 +16807,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } if (!rold->precise && exact == NOT_EXACT) return true; + if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) + return false; + if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off)) + return false; /* Why check_ids() for scalar registers? * * Consider the following BPF code: diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0a9293a57211..90643ccc221d 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -39,12 +39,12 @@ .result = VERBOSE_ACCEPT, .errstr = "mark_precise: frame0: last_idx 26 first_idx 20\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ @@ -100,11 +100,11 @@ .errstr = "26: (85) call bpf_probe_read_kernel#113\ mark_precise: frame0: last_idx 26 first_idx 22\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2,r9 stack= before 20\ mark_precise: frame0: parent state regs=r2,r9 stack=:\ -- cgit v1.2.3 From e575d3a6dd22123888defb622b1742aa2d45b942 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 14 Jun 2024 00:00:31 +0300 Subject: net/mlx5: Correct TASR typo into TSAR TSAR is the correct spelling (Transmit Scheduling ARbiter). Signed-off-by: Cosmin Ratiu Reviewed-by: Gal Pressman Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index d2ebe56c3977..20146a2dc7f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -531,7 +531,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) switch (type) { case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TASR; + ELEMENT_TYPE_CAP_MASK_TSAR; case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_VPORT; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 17acd0f3ca8e..466dcda40bb5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3914,7 +3914,7 @@ enum { }; enum { - ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0, + ELEMENT_TYPE_CAP_MASK_TSAR = 1 << 0, ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, -- cgit v1.2.3 From 296eaab825060d0f25e89b2f85ab9fc26128cea8 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 14 Jun 2024 00:00:36 +0300 Subject: net/mlx5e: Support SWP-mode offload L4 csum calculation Calculate the pseudo-header checksum for both IPSec transport mode and IPSec tunnel mode for mlx5 devices that do not implement a pure hardware checksum offload for L4 checksum calculation. Introduce a capability bit that identifies such mlx5 devices. Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 37 ++++++++++++++++++++++ .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 6 +++- include/linux/mlx5/mlx5_ifc.h | 3 +- 3 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index d1f0f868d494..5ec468268d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,6 +6,8 @@ #include "en.h" #include +#include +#include #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) @@ -479,6 +481,41 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } +static inline void +mlx5e_swp_encap_csum_partial(struct mlx5_core_dev *mdev, struct sk_buff *skb, bool tunnel) +{ + const struct iphdr *ip = tunnel ? inner_ip_hdr(skb) : ip_hdr(skb); + const struct ipv6hdr *ip6; + struct tcphdr *th; + struct udphdr *uh; + int len; + + if (!MLX5_CAP_ETH(mdev, swp_csum_l4_partial) || !skb_is_gso(skb)) + return; + + if (skb_is_gso_tcp(skb)) { + th = inner_tcp_hdr(skb); + len = skb_shinfo(skb)->gso_size + inner_tcp_hdrlen(skb); + + if (ip->version == 4) { + th->check = ~tcp_v4_check(len, ip->saddr, ip->daddr, 0); + } else { + ip6 = tunnel ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + th->check = ~tcp_v6_check(len, &ip6->saddr, &ip6->daddr, 0); + } + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + uh = (struct udphdr *)skb_inner_transport_header(skb); + len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); + + if (ip->version == 4) { + uh->check = ~udp_v4_check(len, ip->saddr, ip->daddr, 0); + } else { + ip6 = tunnel ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + uh->check = ~udp_v6_check(len, &ip6->saddr, &ip6->daddr, 0); + } + } +} + #define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 359050f0b54d..3cc640669247 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -116,6 +116,7 @@ static inline bool mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { + struct mlx5_core_dev *mdev = sq->mdev; u8 inner_ipproto; if (!mlx5e_ipsec_eseg_meta(eseg)) @@ -125,9 +126,12 @@ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, inner_ipproto = xfrm_offload(skb)->inner_ipproto; if (inner_ipproto) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; - if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) { + mlx5e_swp_encap_csum_partial(mdev, skb, true); eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + mlx5e_swp_encap_csum_partial(mdev, skb, false); eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 466dcda40bb5..66b921c81c0f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1093,7 +1093,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_ip_over_ip_tx[0x1]; u8 reserved_at_2e[0x2]; u8 max_vxlan_udp_ports[0x8]; - u8 reserved_at_38[0x6]; + u8 swp_csum_l4_partial[0x1]; + u8 reserved_at_39[0x5]; u8 max_geneve_opt_len[0x1]; u8 tunnel_stateless_geneve_rx[0x1]; -- cgit v1.2.3 From 6c3282a6b296385bee2c383442c39f507b0d51dd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 13 Jun 2024 11:36:06 +0100 Subject: net: stmmac: add select_pcs() platform method Allow platform drivers to provide their logic to select an appropriate PCS. Tested-by: Romain Gantois Reviewed-by: Romain Gantois Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1sHhoM-00Fesu-8E@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ include/linux/stmmac.h | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bbedf2a8c60f..302aa4080de3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -949,6 +949,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + struct phylink_pcs *pcs; + + if (priv->plat->select_pcs) { + pcs = priv->plat->select_pcs(priv, interface); + if (!IS_ERR(pcs)) + return pcs; + } if (priv->hw->xpcs) return &priv->hw->xpcs->pcs; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8f0f156d50d3..9c54f82901a1 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -13,7 +13,7 @@ #define __STMMAC_PLATFORM_DATA #include -#include +#include #define MTL_MAX_RX_QUEUES 8 #define MTL_MAX_TX_QUEUES 8 @@ -271,6 +271,8 @@ struct plat_stmmacenet_data { void (*dump_debug_regs)(void *priv); int (*pcs_init)(struct stmmac_priv *priv); void (*pcs_exit)(struct stmmac_priv *priv); + struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv, + phy_interface_t interface); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From d98995b4bf981519dde4af0a081c393d62474039 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 3 Jun 2024 13:26:37 +0300 Subject: net/mlx5: Reimplement write combining test The test of write combining was added before in mlx5_ib driver. It opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When BlueFlame is used, WQEs get written directly to a PCI BAR of the device (in addition to memory) so that the device handles them without having to access memory. In this test, the WQEs written in memory are different from the ones written to the BlueFlame which request CQE update. By checking the completion reports posted on CQ, we can know if BlueFlame succeeds or not. The write combining must be supported if BlueFlame succeeds as its register is written using write combining. This patch reimplements the test in the same way, but using a pair of SQ and CQ only. It is moved to mlx5_core as a general feature used by both mlx5_core and mlx5_ib. Besides, save write combine test result of the PCI function, so that its thousands of child functions such as SF can query without paying the time and resource penalty by itself. The test function is called only after failing to get the cached result. With this enhancement, all thousands of SFs of the PF attached to same driver no longer need to perform WC check explicitly, which is already done in the system. This saves several commands per SF, thereby speeds up SF creation and also saves completion EQ creation. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 19 +- drivers/infiniband/hw/mlx5/mem.c | 198 ----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/infiniband/hw/mlx5/qp.c | 16 - drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 + drivers/net/ethernet/mellanox/mlx5/core/wc.c | 434 +++++++++++++++++++++++ include/linux/mlx5/driver.h | 11 + 8 files changed, 451 insertions(+), 234 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/wc.c (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2366c46eebc8..00e292422801 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, } resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) + if (mlx5_wc_support_get(dev->mdev)) resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp->cache_line_size = cache_line_size(); @@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm switch (command) { case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_ALLOC_WC: - if (!dev->wc_support) + if (!mlx5_wc_support_get(dev->mdev)) return -EPERM; fallthrough; case MLX5_IB_MMAP_NC_PAGE: @@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) return -EOPNOTSUPP; - if (!to_mdev(c->ibucontext.device)->wc_support && + if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) && alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) return -EOPNOTSUPP; @@ -3766,18 +3766,6 @@ err_mp: return err; } -static int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - static const struct ib_device_ops mlx5_ib_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_MLX5, @@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .device_group = &mlx5_attr_group, - .enable_driver = mlx5_ib_enable_driver, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 5a22be14d958..af321f6ef7f5 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -30,10 +30,8 @@ * SOFTWARE. */ -#include #include #include "mlx5_ib.h" -#include /* * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be @@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( return 0; return page_size; } - -#define WR_ID_BF 0xBF -#define WR_ID_END 0xBAD -#define TEST_WC_NUM_WQES 255 -#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) -static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, - bool signaled) -{ - struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_wqe_ctrl_seg *ctrl; - struct mlx5_bf *bf = &qp->bf; - __be32 mmio_wqe[16] = {}; - unsigned long flags; - unsigned int idx; - - if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) - return -EIO; - - spin_lock_irqsave(&qp->sq.lock, flags); - - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); - - memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); - ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; - ctrl->opmod_idx_opcode = - cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); - ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | - (qp->trans_qp.base.mqp.qpn << 8)); - - qp->sq.wrid[idx] = wr_id; - qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; - qp->sq.wqe_head[idx] = qp->sq.head + 1; - qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), - MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - qp->sq.head++; - - memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); - ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= - MLX5_WQE_CTRL_CQ_UPDATE; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell - */ - wmb(); - __iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe, - sizeof(mmio_wqe) / 8); - - bf->offset ^= bf->buf_size; - - spin_unlock_irqrestore(&qp->sq.lock, flags); - - return 0; -} - -static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) -{ - int ret; - struct ib_wc wc = {}; - unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; - - do { - ret = ib_poll_cq(cq, 1, &wc); - if (ret < 0 || wc.status) - return ret < 0 ? ret : -EINVAL; - if (ret) - break; - } while (!time_after(jiffies, end)); - - if (!ret) - return -ETIMEDOUT; - - if (wc.wr_id != WR_ID_BF) - ret = 0; - - return ret; -} - -static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) -{ - int err, i; - - for (i = 0; i < TEST_WC_NUM_WQES; i++) { - err = post_send_nop(dev, qp, WR_ID_BF, false); - if (err) - return err; - } - - return post_send_nop(dev, qp, WR_ID_END, true); -} - -int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) -{ - struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; - int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); - struct ib_qp_init_attr qp_init_attr = { - .cap = { .max_send_wr = TEST_WC_NUM_WQES }, - .qp_type = IB_QPT_UD, - .sq_sig_type = IB_SIGNAL_REQ_WR, - .create_flags = MLX5_IB_QP_CREATE_WC_TEST, - }; - struct ib_qp_attr qp_attr = { .port_num = 1 }; - struct ib_device *ibdev = &dev->ib_dev; - struct ib_qp *qp; - struct ib_cq *cq; - struct ib_pd *pd; - int ret; - - if (!MLX5_CAP_GEN(dev->mdev, bf)) - return 0; - - if (!dev->mdev->roce.roce_en && - port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { - if (mlx5_core_is_pf(dev->mdev)) - dev->wc_support = arch_can_pci_mmap_wc(); - return 0; - } - - ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); - if (ret) - goto print_err; - - if (!dev->wc_bfreg.wc) - goto out1; - - pd = ib_alloc_pd(ibdev, 0); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto out1; - } - - cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); - goto out2; - } - - qp_init_attr.recv_cq = cq; - qp_init_attr.send_cq = cq; - qp = ib_create_qp(pd, &qp_init_attr); - if (IS_ERR(qp)) { - ret = PTR_ERR(qp); - goto out3; - } - - qp_attr.qp_state = IB_QPS_INIT; - ret = ib_modify_qp(qp, &qp_attr, - IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | - IB_QP_QKEY); - if (ret) - goto out4; - - qp_attr.qp_state = IB_QPS_RTR; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); - if (ret) - goto out4; - - qp_attr.qp_state = IB_QPS_RTS; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); - if (ret) - goto out4; - - ret = test_wc_do_send(dev, qp); - if (ret < 0) - goto out4; - - ret = test_wc_poll_cq_result(dev, cq); - if (ret > 0) { - dev->wc_support = true; - ret = 0; - } - -out4: - ib_destroy_qp(qp); -out3: - ib_destroy_cq(cq); -out2: - ib_dealloc_pd(pd); -out1: - mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); -print_err: - if (ret) - mlx5_ib_err( - dev, - "Error %d while trying to test write-combining support\n", - ret); - return ret; -} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f255a12e26a0..b68779e9d86c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -341,7 +341,6 @@ struct mlx5_ib_flow_db { * rely on the range reserved for that use in the ib_qp_create_flags enum. */ #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START -#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -1123,7 +1122,6 @@ struct mlx5_ib_dev { u8 ib_active:1; u8 is_rep:1; u8 lag_active:1; - u8 wc_support:1; u8 fill_delay; struct umr_common umrc; /* sync used page count stats @@ -1149,7 +1147,6 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; - struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e2164f813607..e8c0fead4062 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; - else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) - qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, return; } - if (flag == MLX5_IB_QP_CREATE_WC_TEST) { - /* - * Special case, if condition didn't meet, it won't be error, - * just different in-kernel flow. - */ - *flags &= ~MLX5_IB_QP_CREATE_WC_TEST; - return; - } mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); } @@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, IB_QP_CREATE_PCI_WRITE_END_PADDING, MLX5_CAP_GEN(mdev, end_pad), qp); - process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST, - qp_type != MLX5_IB_QPT_REG_UMR, qp); process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); @@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR) return true; - /* Internal QP used for wc testing, with NOPs in wq */ - if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) - return true; - return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 76dc5a9b9648..1289475e7be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 459a836a5d9c..527da58c7953 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&dev->intf_state_mutex); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); mutex_init(&dev->mlx5e_res.uplink_netdev_lock); + mutex_init(&dev->wc_state_lock); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->wc_state_lock); mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); mutex_destroy(&dev->intf_state_mutex); lockdep_unregister_key(&dev->lock_key); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c new file mode 100644 index 000000000000..1bed75eca97d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include "lib/clock.h" +#include "mlx5_core.h" +#include "wq.h" + +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES)) +#define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) + +struct mlx5_wc_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + struct mlx5_core_cq mcq; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +}; + +struct mlx5_wc_sq { + /* data path */ + u16 cc; + u16 pc; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 sqn; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + + struct mlx5_wc_cq cq; + struct mlx5_sq_bfreg bfreg; +}; + +static int mlx5_wc_create_cqwq(struct mlx5_core_dev *mdev, void *cqc, + struct mlx5_wc_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + struct mlx5_wq_param param = {}; + int err; + u32 i; + + err = mlx5_cqwq_create(mdev, ¶m, cqc, &cq->wq, &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int create_wc_cq(struct mlx5_wc_cq *cq, void *cqc_data) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int err, inlen, eqn; + void *in, *cqc; + + err = mlx5_comp_eqn_get(mdev, 0, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + return err; +} + +static int mlx5_wc_create_cq(struct mlx5_core_dev *mdev, struct mlx5_wc_cq *cq) +{ + void *cqc; + int err; + + cqc = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); + if (!cqc) + return -ENOMEM; + + MLX5_SET(cqc, cqc, log_cq_size, TEST_WC_LOG_CQ_SZ); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); + + err = mlx5_wc_create_cqwq(mdev, cqc, cq); + if (err) { + mlx5_core_err(mdev, "Failed to create wc cq wq, err=%d\n", err); + goto err_create_cqwq; + } + + err = create_wc_cq(cq, cqc); + if (err) { + mlx5_core_err(mdev, "Failed to create wc cq, err=%d\n", err); + goto err_create_cq; + } + + kvfree(cqc); + return 0; + +err_create_cq: + mlx5_wq_destroy(&cq->wq_ctrl); +err_create_cqwq: + kvfree(cqc); + return err; +} + +static void mlx5_wc_destroy_cq(struct mlx5_wc_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int create_wc_sq(struct mlx5_core_dev *mdev, void *sqc_data, + struct mlx5_wc_sq *sq) +{ + void *in, *sqc, *wq; + int inlen, err; + u8 ts_format; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + MLX5_SET(sqc, sqc, ts_format, ts_format); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + if (err) { + mlx5_core_err(mdev, "Failed to create wc sq, err=%d\n", err); + goto err_create_sq; + } + + memset(in, 0, MLX5_ST_SZ_BYTES(modify_sq_in)); + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); + + err = mlx5_core_modify_sq(mdev, sq->sqn, in); + if (err) { + mlx5_core_err(mdev, "Failed to set wc sq(sqn=0x%x) ready, err=%d\n", + sq->sqn, err); + goto err_modify_sq; + } + + kvfree(in); + return 0; + +err_modify_sq: + mlx5_core_destroy_sq(mdev, sq->sqn); +err_create_sq: + kvfree(in); + return err; +} + +static int mlx5_wc_create_sq(struct mlx5_core_dev *mdev, struct mlx5_wc_sq *sq) +{ + struct mlx5_wq_param param = {}; + void *sqc_data, *wq; + int err; + + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); + if (!sqc_data) + return -ENOMEM; + + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(wq, wq, log_wq_sz, TEST_WC_SQ_LOG_WQ_SZ); + + err = mlx5_wq_cyc_create(mdev, ¶m, wq, &sq->wq, &sq->wq_ctrl); + if (err) { + mlx5_core_err(mdev, "Failed to create wc sq wq, err=%d\n", err); + goto err_create_wq_cyc; + } + + err = create_wc_sq(mdev, sqc_data, sq); + if (err) + goto err_create_sq; + + mlx5_core_dbg(mdev, "wc sq->sqn = 0x%x created\n", sq->sqn); + + kvfree(sqc_data); + return 0; + +err_create_sq: + mlx5_wq_destroy(&sq->wq_ctrl); +err_create_wq_cyc: + kvfree(sqc_data); + return err; +} + +static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq) +{ + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled) +{ + int buf_size = (1 << MLX5_CAP_GEN(sq->cq.mdev, log_bf_reg_size)) / 2; + struct mlx5_wqe_ctrl_seg *ctrl; + __be32 mmio_wqe[16] = {}; + u16 pi; + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + memset(ctrl, 0, sizeof(*ctrl)); + ctrl->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = + cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), MLX5_SEND_WQE_DS)); + if (signaled) + ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + sq->pc++; + sq->wq.db[MLX5_SND_DBR] = cpu_to_be32(sq->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + __iowrite64_copy(sq->bfreg.map + sq->bfreg.offset, mmio_wqe, + sizeof(mmio_wqe) / 8); + + sq->bfreg.offset ^= buf_size; +} + +static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq) +{ + struct mlx5_wc_cq *cq = &sq->cq; + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return -ETIMEDOUT; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + mlx5_cqwq_pop(&cq->wq); + + if (get_cqe_opcode(cqe) == MLX5_CQE_REQ) { + int wqe_counter = be16_to_cpu(cqe->wqe_counter); + struct mlx5_core_dev *mdev = cq->mdev; + + if (wqe_counter == TEST_WC_NUM_WQES - 1) + mdev->wc_state = MLX5_WC_STATE_UNSUPPORTED; + else + mdev->wc_state = MLX5_WC_STATE_SUPPORTED; + + mlx5_core_dbg(mdev, "wc wqe_counter = 0x%x\n", wqe_counter); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc++; + + return 0; +} + +static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) +{ + unsigned long expires; + struct mlx5_wc_sq *sq; + int i, err; + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + return; + + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + if (!sq) + return; + + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, true, false); + if (err) { + mlx5_core_err(mdev, "Failed to alloc bfreg for wc, err=%d\n", err); + goto err_alloc_bfreg; + } + + err = mlx5_wc_create_cq(mdev, &sq->cq); + if (err) + goto err_create_cq; + + err = mlx5_wc_create_sq(mdev, sq); + if (err) + goto err_create_sq; + + for (i = 0; i < TEST_WC_NUM_WQES - 1; i++) + mlx5_wc_post_nop(sq, false); + + mlx5_wc_post_nop(sq, true); + + expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + do { + err = mlx5_wc_poll_cq(sq); + if (err) + usleep_range(2, 10); + } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED && + time_is_after_jiffies(expires)); + + mlx5_wc_destroy_sq(sq); + +err_create_sq: + mlx5_wc_destroy_cq(&sq->cq); +err_create_cq: + mlx5_free_bfreg(mdev, &sq->bfreg); +err_alloc_bfreg: + kfree(sq); +} + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *parent = NULL; + + if (!MLX5_CAP_GEN(mdev, bf)) { + mlx5_core_dbg(mdev, "BlueFlame not supported\n"); + goto out; + } + + if (!MLX5_CAP_GEN(mdev, log_max_sq)) { + mlx5_core_dbg(mdev, "SQ not supported\n"); + goto out; + } + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + /* No need to lock anything as we perform WC test only + * once for whole device and was already done. + */ + goto out; + + mutex_lock(&mdev->wc_state_lock); + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + goto unlock; + +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(mdev)) + parent = mdev->priv.parent_mdev; +#endif + + if (parent) { + mutex_lock(&parent->wc_state_lock); + + mlx5_core_test_wc(parent); + + mlx5_core_dbg(mdev, "parent set wc_state=%d\n", + parent->wc_state); + mdev->wc_state = parent->wc_state; + + mutex_unlock(&parent->wc_state_lock); + } + + mlx5_core_test_wc(mdev); + +unlock: + mutex_unlock(&mdev->wc_state_lock); +out: + mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state); + + return mdev->wc_state == MLX5_WC_STATE_SUPPORTED; +} +EXPORT_SYMBOL(mlx5_wc_support_get); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 779cfdf2e9d6..0d31f77396fc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -766,6 +766,12 @@ struct mlx5_hca_cap { u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; }; +enum mlx5_wc_state { + MLX5_WC_STATE_UNINITIALIZED, + MLX5_WC_STATE_UNSUPPORTED, + MLX5_WC_STATE_SUPPORTED, +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -824,6 +830,9 @@ struct mlx5_core_dev { #endif u64 num_ipsec_offloads; struct mlx5_sd *sd; + enum mlx5_wc_state wc_state; + /* sync write combining state */ + struct mutex wc_state_lock; }; struct mlx5_db { @@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From b339e0a39dc37726712b9f0485d78fe4306d1667 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 13 Jun 2024 21:00:04 +0300 Subject: RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded The req_transport_retries_exceeded counter shows the number of times requester detected transport retries exceed error. The req_rnr_retries_exceeded counter show the number of times the requester detected RNR NAKs retries exceed error. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/250466af94f4989d638fab168e246035530e912f.1718301543.git.leon@kernel.org Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 8300ce622835..4f6c1968a2ee 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -83,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(resp_remote_access_errors), INIT_Q_COUNTER(resp_cqe_flush_error), INIT_Q_COUNTER(req_cqe_flush_error), + INIT_Q_COUNTER(req_transport_retries_exceeded), + INIT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter roce_accl_cnts[] = { @@ -102,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = { INIT_VPORT_Q_COUNTER(resp_remote_access_errors), INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), INIT_VPORT_Q_COUNTER(req_cqe_flush_error), + INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded), + INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..09d9d87d62c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,11 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0xa0]; + u8 reserved_at_320[0x60]; + + u8 req_rnr_retries_exceeded[0x20]; + + u8 reserved_at_3a0[0x20]; u8 resp_local_length_error[0x20]; -- cgit v1.2.3 From f22b4b55edb507a2b30981e133b66b642be4d13f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Jun 2024 14:33:16 -0700 Subject: net: make for_each_netdev_dump() a little more bug-proof I find the behavior of xa_for_each_start() slightly counter-intuitive. It doesn't end the iteration by making the index point after the last element. IOW calling xa_for_each_start() again after it "finished" will run the body of the loop for the last valid element, instead of doing nothing. This works fine for netlink dumps if they terminate correctly (i.e. coalesce or carefully handle NLM_DONE), but as we keep getting reminded legacy dumps are unlikely to go away. Fixing this generically at the xa_for_each_start() level seems hard - there is no index reserved for "end of iteration". ifindexes are 31b wide, tho, and iterator is ulong so for for_each_netdev_dump() it's safe to go to the next element. Signed-off-by: Jakub Kicinski Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f148a01dd1d1..85111502cf8f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3021,7 +3021,8 @@ int call_netdevice_notifiers_info(unsigned long val, #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) #define for_each_netdev_dump(net, d, ifindex) \ - xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + for (; (d = xa_find(&(net)->dev_by_index, &ifindex, \ + ULONG_MAX, XA_PRESENT)); ifindex++) static inline struct net_device *next_net_device(struct net_device *dev) { -- cgit v1.2.3 From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:30 +0800 Subject: net/smc: Introduce IPPROTO_SMC This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 + net/smc/Makefile | 2 +- net/smc/af_smc.c | 16 ++++- net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_inet.h | 22 +++++++ 5 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 net/smc/smc_inet.c create mode 100644 net/smc/smc_inet.h (limited to 'include') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/net/smc/Makefile b/net/smc/Makefile index 2c510d543058..60f1c87d5212 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7ee6f37813d6..73a875573e7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -54,6 +54,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_loopback.h" +#include "smc_inet.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3575,10 +3576,15 @@ static int __init smc_init(void) pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); goto out_lo; } - + rc = smc_inet_init(); + if (rc) { + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); + goto out_ulp; + } static_branch_enable(&tcp_have_smc); return 0; - +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_lo: smc_loopback_exit(); out_ib: @@ -3615,6 +3621,7 @@ out_pernet_subsys: static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + smc_inet_exit(); tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3642,4 +3649,9 @@ MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); +#endif /* CONFIG_IPV6 */ MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c new file mode 100644 index 000000000000..bece346dd8e9 --- /dev/null +++ b/net/smc/smc_inet.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + * + * Copyright IBM Corp. 2016, 2018 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ + +#include +#include + +#include "smc_inet.h" +#include "smc.h" + +static int smc_inet_init_sock(struct sock *sk); + +static struct proto smc_inet_prot = { + .name = "INET_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct proto smc_inet6_prot = { + .name = "INET6_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif /* CONFIG_IPV6 */ + +static int smc_inet_init_sock(struct sock *sk) +{ + struct net *net = sock_net(sk); + + /* init common smc sock */ + smc_sk_init(net, sk, IPPROTO_SMC); + /* create clcsock */ + return smc_create_clcsk(net, sk, sk->sk_family); +} + +int __init smc_inet_init(void) +{ + int rc; + + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", + __func__, rc); + return rc; + } + /* no return value */ + inet_register_protosw(&smc_inet_protosw); + +#if IS_ENABLED(CONFIG_IPV6) + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", + __func__, rc); + goto out_inet6_prot; + } + rc = inet6_register_protosw(&smc_inet6_protosw); + if (rc) { + pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n", + __func__, rc); + goto out_inet6_protosw; + } + return rc; +out_inet6_protosw: + proto_unregister(&smc_inet6_prot); +out_inet6_prot: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +#endif /* CONFIG_IPV6 */ + return rc; +} + +void smc_inet_exit(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); + proto_unregister(&smc_inet6_prot); +#endif /* CONFIG_IPV6 */ + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +} diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h new file mode 100644 index 000000000000..a489c8a2b8ef --- /dev/null +++ b/net/smc/smc_inet.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + + * Copyright IBM Corp. 2016 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ +#ifndef __INET_SMC +#define __INET_SMC + +/* Initialize protocol registration on IPPROTO_SMC, + * @return 0 on success + */ +int smc_inet_init(void); + +void smc_inet_exit(void); + +#endif /* __INET_SMC */ -- cgit v1.2.3 From efb459303dd5dd6e198a0d58322dc04c3356dc23 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 12 Jun 2024 17:04:02 +0200 Subject: net: Move dev_set_hwtstamp_phylib to net/core/dev.h This declaration was added to the header to be called from ethtool. ethtool is separated from core for code organization but it is not really a separate entity, it controls very core things. As ethtool is an internal stuff it is not wise to have it in netdevice.h. Move the declaration to net/core/dev.h instead. Remove the EXPORT_SYMBOL_GPL call as ethtool can not be built as a module. Reviewed-by: Willem de Bruijn Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240612-feature_ptp_netnext-v15-2-b2a086257b63@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 --- net/core/dev.h | 4 ++++ net/core/dev_ioctl.c | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85111502cf8f..c83b390191d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3904,9 +3904,6 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); -int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, diff --git a/net/core/dev.h b/net/core/dev.h index b7b518bc2be5..58f88d28bc99 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -166,4 +166,8 @@ static inline void dev_xmit_recursion_dec(void) __this_cpu_dec(softnet_data.xmit.recursion); } +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); + #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 9a66cf5015f2..b9719ed3c3fd 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -363,7 +363,6 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { -- cgit v1.2.3 From 777b8afb8179155353ec14b1d8153122410aba29 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 15 Jun 2024 20:00:27 +0800 Subject: net: phy: introduce core support for phy-mode = "10g-qxgmii" 10G-QXGMII is a MAC-to-PHY interface defined by the USXGMII multiport specification. It uses the same signaling as USXGMII, but it multiplexes 4 ports over the link, resulting in a maximum speed of 2.5G per port. Some in-tree SoCs like the NXP LS1028A use "usxgmii" when they mean either the single-port USXGMII or the quad-port 10G-QXGMII variant, and they could get away just fine with that thus far. But there is a need to distinguish between the 2 as far as SerDes drivers are concerned. Signed-off-by: Vladimir Oltean Signed-off-by: Luo Jie Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- Documentation/networking/phy.rst | 6 ++++++ drivers/net/phy/phy-core.c | 1 + drivers/net/phy/phylink.c | 9 ++++++++- include/linux/phy.h | 4 ++++ include/linux/phylink.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index 1283240d7620..f64641417c54 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -327,6 +327,12 @@ Some of the interface modes are described below: This is the Penta SGMII mode, it is similar to QSGMII but it combines 5 SGMII lines into a single link compared to 4 on QSGMII. +``PHY_INTERFACE_MODE_10G_QXGMII`` + Represents the 10G-QXGMII PHY-MAC interface as defined by the Cisco USXGMII + Multiport Copper Interface document. It supports 4 ports over a 10.3125 GHz + SerDes lane, each port having speeds of 2.5G / 1G / 100M / 10M achieved + through symbol replication. The PCS expects the standard USXGMII code word. + Pause frames / flow control =========================== diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 15f349e5995a..a235ea2264a7 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -141,6 +141,7 @@ int phy_interface_num_ports(phy_interface_t interface) return 1; case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: return 4; case PHY_INTERFACE_MODE_PSGMII: return 5; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 02427378acfd..6c24c48dcf0f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -231,6 +231,7 @@ static int phylink_interface_max_speed(phy_interface_t interface) return SPEED_1000; case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_10G_QXGMII: return SPEED_2500; case PHY_INTERFACE_MODE_5GBASER: @@ -500,7 +501,11 @@ static unsigned long phylink_get_capabilities(phy_interface_t interface, switch (interface) { case PHY_INTERFACE_MODE_USXGMII: - caps |= MAC_10000FD | MAC_5000FD | MAC_2500FD; + caps |= MAC_10000FD | MAC_5000FD; + fallthrough; + + case PHY_INTERFACE_MODE_10G_QXGMII: + caps |= MAC_2500FD; fallthrough; case PHY_INTERFACE_MODE_RGMII_TXID: @@ -926,6 +931,7 @@ static int phylink_parse_mode(struct phylink *pl, case PHY_INTERFACE_MODE_5GBASER: case PHY_INTERFACE_MODE_25GBASER: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: case PHY_INTERFACE_MODE_10GKR: case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XLGMII: @@ -1124,6 +1130,7 @@ static unsigned int phylink_pcs_neg_mode(unsigned int mode, case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_QUSGMII: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: /* These protocols are designed for use with a PHY which * communicates its negotiation result back to the MAC via * inband communication. Note: there exist PHYs that run diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..205fccfc0f60 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -128,6 +128,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN + * @PHY_INTERFACE_MODE_10G_QXGMII: 10G-QXGMII - 4 ports over 10G USXGMII * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -168,6 +169,7 @@ typedef enum { PHY_INTERFACE_MODE_10GKR, PHY_INTERFACE_MODE_QUSGMII, PHY_INTERFACE_MODE_1000BASEKX, + PHY_INTERFACE_MODE_10G_QXGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -289,6 +291,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "100base-x"; case PHY_INTERFACE_MODE_QUSGMII: return "qusgmii"; + case PHY_INTERFACE_MODE_10G_QXGMII: + return "10g-qxgmii"; default: return "unknown"; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index a30a692acc32..2381e07429a2 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -654,6 +654,7 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: return 1600000; case PHY_INTERFACE_MODE_1000BASEX: -- cgit v1.2.3 From 382d1741b5b2feffef7942dd074206372afe1a96 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 17 Jun 2024 13:17:26 -0700 Subject: net: mana: Add support for page sizes other than 4KB on ARM64 As defined by the MANA Hardware spec, the queue size for DMA is 4KB minimal, and power of 2. And, the HWC queue size has to be exactly 4KB. To support page sizes other than 4KB on ARM64, define the minimal queue size as a macro separately from the PAGE_SIZE, which we always assumed it to be 4KB before supporting ARM64. Also, add MANA specific macros and update code related to size alignment, DMA region calculations, etc. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1718655446-6576-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/Kconfig | 2 +- drivers/net/ethernet/microsoft/mana/gdma_main.c | 10 +++++----- drivers/net/ethernet/microsoft/mana/hw_channel.c | 14 +++++++------- drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++++---- drivers/net/ethernet/microsoft/mana/shm_channel.c | 13 +++++++------ include/net/mana/gdma.h | 10 +++++++++- include/net/mana/mana.h | 3 ++- 7 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 286f0d5697a1..901fbffbf718 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_MICROSOFT config MICROSOFT_MANA tristate "Microsoft Azure Network Adapter (MANA) support" depends on PCI_MSI - depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN && ARM64_4K_PAGES) + depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN) depends on PCI_HYPERV select AUXILIARY_BUS select PAGE_POOL diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 1332db9a08eb..e1d70d21e207 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -182,7 +182,7 @@ int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, dma_addr_t dma_handle; void *buf; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; gmi->dev = gc->dev; @@ -717,7 +717,7 @@ EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { - unsigned int num_page = gmi->length / PAGE_SIZE; + unsigned int num_page = gmi->length / MANA_PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; @@ -727,10 +727,10 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, int err; int i; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; - if (offset_in_page(gmi->virt_addr) != 0) + if (!MANA_PAGE_ALIGNED(gmi->virt_addr)) return -EINVAL; hwc = gc->hwc.driver_data; @@ -751,7 +751,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) - req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index bbc4f9e16c98..cafded2f9382 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -362,12 +362,12 @@ static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); - if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (eq_size < MANA_MIN_QSIZE) + eq_size = MANA_MIN_QSIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); - if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (cq_size < MANA_MIN_QSIZE) + cq_size = MANA_MIN_QSIZE; hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); if (!hwc_cq) @@ -429,7 +429,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, dma_buf->num_reqs = q_depth; - buf_size = PAGE_ALIGN(q_depth * max_msg_size); + buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); @@ -497,8 +497,8 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); - if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) - queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (queue_size < MANA_MIN_QSIZE) + queue_size = MANA_MIN_QSIZE; hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); if (!hwc_wq) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 93e526e5dd16..3f094a8a4e97 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1904,10 +1904,10 @@ static int mana_create_txq(struct mana_port_context *apc, * to prevent overflow. */ txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size)); cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); gc = gd->gdma_context; @@ -2204,8 +2204,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 5553af9c8085..0f1679ebad96 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -6,6 +6,7 @@ #include #include +#include #include #define PAGE_FRAME_L48_WIDTH_BYTES 6 @@ -155,8 +156,8 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, return err; } - if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || - !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + if (!MANA_PAGE_ALIGNED(eq_addr) || !MANA_PAGE_ALIGNED(cq_addr) || + !MANA_PAGE_ALIGNED(rq_addr) || !MANA_PAGE_ALIGNED(sq_addr)) return -EINVAL; if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) @@ -183,7 +184,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* EQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(eq_addr); + frame_addr = MANA_PFN(eq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -191,7 +192,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* CQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(cq_addr); + frame_addr = MANA_PFN(cq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -199,7 +200,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* RQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(rq_addr); + frame_addr = MANA_PFN(rq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -207,7 +208,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* SQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(sq_addr); + frame_addr = MANA_PFN(sq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index c547756c4284..83963d9e804d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -224,7 +224,15 @@ struct gdma_dev { struct auxiliary_device *adev; }; -#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE +/* MANA_PAGE_SIZE is the DMA unit */ +#define MANA_PAGE_SHIFT 12 +#define MANA_PAGE_SIZE BIT(MANA_PAGE_SHIFT) +#define MANA_PAGE_ALIGN(x) ALIGN((x), MANA_PAGE_SIZE) +#define MANA_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), MANA_PAGE_SIZE) +#define MANA_PFN(a) ((a) >> MANA_PAGE_SHIFT) + +/* Required by HW */ +#define MANA_MIN_QSIZE MANA_PAGE_SIZE #define GDMA_CQE_SIZE 64 #define GDMA_EQE_SIZE 16 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 59823901b74f..e39b8676fe54 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -42,7 +42,8 @@ enum TRI_STATE { #define MAX_SEND_BUFFERS_PER_QUEUE 256 -#define EQ_SIZE (8 * PAGE_SIZE) +#define EQ_SIZE (8 * MANA_PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 #define MAX_PORTS_IN_MANA_DEV 256 -- cgit v1.2.3 From c53795d48ee8f385c6a9e394651e7ee914baaeba Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:04 -0700 Subject: net: add rx_sk to trace_kfree_skb skb does not include enough information to find out receiving sockets/services and netns/containers on packet drops. In theory skb->dev tells about netns, but it can get cleared/reused, e.g. by TCP stack for OOO packet lookup. Similarly, skb->sk often identifies a local sender, and tells nothing about a receiver. Allow passing an extra receiving socket to the tracepoint to improve the visibility on receiving drops. Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/skb.h | 11 +++++++---- net/core/dev.c | 2 +- net/core/drop_monitor.c | 9 ++++++--- net/core/skbuff.c | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 07e0715628ec..b877133cd93a 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -24,13 +24,14 @@ DEFINE_DROP_REASON(FN, FN) TRACE_EVENT(kfree_skb, TP_PROTO(struct sk_buff *skb, void *location, - enum skb_drop_reason reason), + enum skb_drop_reason reason, struct sock *rx_sk), - TP_ARGS(skb, location, reason), + TP_ARGS(skb, location, reason, rx_sk), TP_STRUCT__entry( __field(void *, skbaddr) __field(void *, location) + __field(void *, rx_sk) __field(unsigned short, protocol) __field(enum skb_drop_reason, reason) ), @@ -38,12 +39,14 @@ TRACE_EVENT(kfree_skb, TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; + __entry->rx_sk = rx_sk; __entry->protocol = ntohs(skb->protocol); __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s", - __entry->skbaddr, __entry->protocol, __entry->location, + TP_printk("skbaddr=%p rx_sk=%p protocol=%u location=%pS reason: %s", + __entry->skbaddr, __entry->rx_sk, __entry->protocol, + __entry->location, __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe))) ); diff --git a/net/core/dev.c b/net/core/dev.c index c361a7b69da8..093d82bf0e28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5234,7 +5234,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) trace_consume_skb(skb, net_tx_action); else trace_kfree_skb(skb, net_tx_action, - get_kfree_skb_cb(skb)->reason); + get_kfree_skb_cb(skb)->reason, NULL); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 430ed18f8584..2e0ae3328232 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -109,7 +109,8 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason); + enum skb_drop_reason reason, + struct sock *rx_sk); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -264,7 +265,8 @@ out: static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason) + enum skb_drop_reason reason, + struct sock *rx_sk) { trace_drop_common(skb, location); } @@ -491,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location, - enum skb_drop_reason reason) + enum skb_drop_reason reason, + struct sock *rx_sk) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c8ac79851cd6..8973db4eabd4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1203,7 +1203,7 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else - trace_kfree_skb(skb, __builtin_return_address(0), reason); + trace_kfree_skb(skb, __builtin_return_address(0), reason, NULL); return true; } -- cgit v1.2.3 From ba8de796baf4bdc03530774fb284fe3c97875566 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:09 -0700 Subject: net: introduce sk_skb_reason_drop function Long used destructors kfree_skb and kfree_skb_reason do not pass receiving socket to packet drop tracepoints trace_kfree_skb. This makes it hard to track packet drops of a certain netns (container) or a socket (user application). The naming of these destructors are also not consistent with most sk/skb operating functions, i.e. functions named "sk_xxx" or "skb_xxx". Introduce a new functions sk_skb_reason_drop as drop-in replacement for kfree_skb_reason on local receiving path. Callers can now pass receiving sockets to the tracepoints. kfree_skb and kfree_skb_reason are still usable but they are now just inline helpers that call sk_skb_reason_drop. Note it is not feasible to do the same to consume_skb. Packets not dropped can flow through multiple receive handlers, and have multiple receiving sockets. Leave it untouched for now. Suggested-by: Eric Dumazet Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++-- net/core/skbuff.c | 22 ++++++++++++---------- 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 813406a9bd6c..f4cda3fbdb75 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1251,8 +1251,14 @@ static inline bool skb_data_unref(const struct sk_buff *skb, return true; } -void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason); + +static inline void +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +{ + sk_skb_reason_drop(NULL, skb, reason); +} /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8973db4eabd4..2315c088e91d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1190,7 +1190,8 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); static __always_inline -bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +bool __sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason) { if (unlikely(!skb_unref(skb))) return false; @@ -1203,26 +1204,27 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); else - trace_kfree_skb(skb, __builtin_return_address(0), reason, NULL); + trace_kfree_skb(skb, __builtin_return_address(0), reason, sk); return true; } /** - * kfree_skb_reason - free an sk_buff with special reason + * sk_skb_reason_drop - free an sk_buff with special reason + * @sk: the socket to receive @skb, or NULL if not applicable * @skb: buffer to free * @reason: reason why this skb is dropped * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' - * tracepoint. + * Drop a reference to the buffer and free it if the usage count has hit + * zero. Meanwhile, pass the receiving socket and drop reason to + * 'kfree_skb' tracepoint. */ void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason) { - if (__kfree_skb_reason(skb, reason)) + if (__sk_skb_reason_drop(sk, skb, reason)) __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb_reason); +EXPORT_SYMBOL(sk_skb_reason_drop); #define KFREE_SKB_BULK_SIZE 16 @@ -1261,7 +1263,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason) while (segs) { struct sk_buff *next = segs->next; - if (__kfree_skb_reason(segs, reason)) { + if (__sk_skb_reason_drop(NULL, segs, reason)) { skb_poison_list(segs); kfree_skb_add_bulk(segs, &sa, reason); } -- cgit v1.2.3 From ba63a7e08523be3496cc1b7e5f77d306144a3a40 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 20 Apr 2024 21:47:46 +0200 Subject: can: isotp: remove ISO 15675-2 specification version where possible With the new ISO 15765-2:2024 release the former documentation and comments have to be reworked. This patch removes the ISO specification version/date where possible. Signed-off-by: Oliver Hartkopp Acked-by: Vincent Mailhol Acked-by: Francesco Valla Link: https://lore.kernel.org/all/20240420194746.4885-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 2 +- net/can/Kconfig | 11 +++++------ net/can/isotp.c | 11 ++++++----- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 6cde62371b6f..bd990917f7c4 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -2,7 +2,7 @@ /* * linux/can/isotp.h * - * Definitions for isotp CAN sockets (ISO 15765-2:2016) + * Definitions for ISO 15765-2 CAN transport protocol sockets * * Copyright (c) 2020 Volkswagen Group Electronic Research * All rights reserved. diff --git a/net/can/Kconfig b/net/can/Kconfig index cb56be8e3862..af64a6f76458 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -56,18 +56,17 @@ config CAN_GW source "net/can/j1939/Kconfig" config CAN_ISOTP - tristate "ISO 15765-2:2016 CAN transport protocol" + tristate "ISO 15765-2 CAN transport protocol" help CAN Transport Protocols offer support for segmented Point-to-Point communication between CAN nodes via two defined CAN Identifiers. + This protocol driver implements segmented data transfers for CAN CC + (aka Classical CAN, CAN 2.0B) and CAN FD frame types which were + introduced with ISO 15765-2:2016. As CAN frames can only transport a small amount of data bytes - (max. 8 bytes for 'classic' CAN and max. 64 bytes for CAN FD) this + (max. 8 bytes for CAN CC and max. 64 bytes for CAN FD) this segmentation is needed to transport longer Protocol Data Units (PDU) as needed e.g. for vehicle diagnosis (UDS, ISO 14229) or IP-over-CAN traffic. - This protocol driver implements data transfers according to - ISO 15765-2:2016 for 'classic' CAN and CAN FD frame types. - If you want to perform automotive vehicle diagnostic services (UDS), - say 'y'. endif diff --git a/net/can/isotp.c b/net/can/isotp.c index 25bac0fafc83..16046931542a 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -72,7 +72,7 @@ #include #include -MODULE_DESCRIPTION("PF_CAN isotp 15765-2:2016 protocol"); +MODULE_DESCRIPTION("PF_CAN ISO 15765-2 transport protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp "); MODULE_ALIAS("can-proto-6"); @@ -83,10 +83,11 @@ MODULE_ALIAS("can-proto-6"); (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -/* ISO 15765-2:2016 supports more than 4095 byte per ISO PDU as the FF_DL can - * take full 32 bit values (4 Gbyte). We would need some good concept to handle - * this between user space and kernel space. For now set the static buffer to - * something about 8 kbyte to be able to test this new functionality. +/* Since ISO 15765-2:2016 the CAN isotp protocol supports more than 4095 + * byte per ISO PDU as the FF_DL can take full 32 bit values (4 Gbyte). + * We would need some good concept to handle this between user space and + * kernel space. For now set the static buffer to something about 8 kbyte + * to be able to test this new functionality. */ #define DEFAULT_MAX_PDU_SIZE 8300 -- cgit v1.2.3 From 9919c5c98cb25dbf7e76aadb9beab55a2a25f830 Mon Sep 17 00:00:00 2001 From: Rafael Passos Date: Fri, 14 Jun 2024 23:24:08 -0300 Subject: bpf: remove unused parameter in bpf_jit_binary_pack_finalize Fixes a compiler warning. the bpf_jit_binary_pack_finalize function was taking an extra bpf_prog parameter that went unused. This removves it and updates the callers accordingly. Signed-off-by: Rafael Passos Link: https://lore.kernel.org/r/20240615022641.210320-2-rafael@rcpassos.me Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 3 +-- arch/powerpc/net/bpf_jit_comp.c | 4 ++-- arch/riscv/net/bpf_jit_core.c | 5 ++--- arch/x86/net/bpf_jit_comp.c | 4 ++-- include/linux/filter.h | 3 +-- kernel/bpf/core.c | 3 +-- 6 files changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 720336d28856..6edaeafd1499 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1829,8 +1829,7 @@ skip_init_ctx: prog->jited_len = 0; goto out_free_hdr; } - if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header, - header))) { + if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { /* ro_header has been freed */ ro_header = NULL; prog = orig_prog; diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 984655419da5..2a36cc2e7e9e 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -225,7 +225,7 @@ skip_init_ctx: fp->jited_len = proglen + FUNCTION_DESCR_SIZE; if (!fp->is_func || extra_pass) { - if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) { + if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { fp = org_fp; goto out_addrs; } @@ -348,7 +348,7 @@ void bpf_jit_free(struct bpf_prog *fp) * before freeing it. */ if (jit_data) { - bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr); + bpf_jit_binary_pack_finalize(jit_data->fhdr, jit_data->hdr); kvfree(jit_data->addrs); kfree(jit_data); } diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 0a96abdaca65..6de753c667f4 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -178,8 +178,7 @@ skip_init_ctx: prog->jited_len = prog_size - cfi_get_offset(); if (!prog->is_func || extra_pass) { - if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, - jit_data->header))) { + if (WARN_ON(bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header))) { /* ro_header has been freed */ jit_data->ro_header = NULL; prog = orig_prog; @@ -258,7 +257,7 @@ void bpf_jit_free(struct bpf_prog *prog) * before freeing it. */ if (jit_data) { - bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header); + bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header); kfree(jit_data); } hdr = bpf_jit_binary_pack_hdr(prog); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7c130001fbfe..d25d81c8ecc0 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3356,7 +3356,7 @@ out_image: * * Both cases are serious bugs and justify WARN_ON. */ - if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) { + if (WARN_ON(bpf_jit_binary_pack_finalize(header, rw_header))) { /* header has been freed */ header = NULL; goto out_image; @@ -3435,7 +3435,7 @@ void bpf_jit_free(struct bpf_prog *prog) * before freeing it. */ if (jit_data) { - bpf_jit_binary_pack_finalize(prog, jit_data->header, + bpf_jit_binary_pack_finalize(jit_data->header, jit_data->rw_header); kvfree(jit_data->addrs); kfree(jit_data); diff --git a/include/linux/filter.h b/include/linux/filter.h index b02aea291b7e..dd41a93f06b2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1129,8 +1129,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, struct bpf_binary_header **rw_hdr, u8 **rw_image, bpf_jit_fill_hole_t bpf_fill_ill_insns); -int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, - struct bpf_binary_header *ro_header, +int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1a6c3faa6e4a..f6951c33790d 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1174,8 +1174,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, } /* Copy JITed text from rw_header to its final location, the ro_header. */ -int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, - struct bpf_binary_header *ro_header, +int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header) { void *ptr; -- cgit v1.2.3 From ab224b9ef7c4eaa752752455ea79bd7022209d5d Mon Sep 17 00:00:00 2001 From: Rafael Passos Date: Fri, 14 Jun 2024 23:24:09 -0300 Subject: bpf: remove unused parameter in __bpf_free_used_btfs Fixes a compiler warning. The __bpf_free_used_btfs function was taking an extra unused struct bpf_prog_aux *aux param Signed-off-by: Rafael Passos Link: https://lore.kernel.org/r/20240615022641.210320-3-rafael@rcpassos.me Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +-- kernel/bpf/core.c | 5 ++--- kernel/bpf/verifier.c | 3 +-- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f636b4998bf7..960780ef04e1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2933,8 +2933,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) return ret; } -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len); +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len); static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f6951c33790d..ae2e1eeda0d4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2742,8 +2742,7 @@ static void bpf_free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len) +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len) { #ifdef CONFIG_BPF_SYSCALL struct btf_mod_pair *btf_mod; @@ -2760,7 +2759,7 @@ void __bpf_free_used_btfs(struct bpf_prog_aux *aux, static void bpf_free_used_btfs(struct bpf_prog_aux *aux) { - __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt); + __bpf_free_used_btfs(aux->used_btfs, aux->used_btf_cnt); kfree(aux->used_btfs); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ffe98a788c33..3f6be4923655 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18694,8 +18694,7 @@ static void release_maps(struct bpf_verifier_env *env) /* drop refcnt of maps used by the rejected program */ static void release_btfs(struct bpf_verifier_env *env) { - __bpf_free_used_btfs(env->prog->aux, env->used_btfs, - env->used_btf_cnt); + __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt); } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -- cgit v1.2.3 From d4e48e3dd45017abdd69a19285d197de897ef44f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 20 Jun 2024 10:17:29 +0100 Subject: module, bpf: Store BTF base pointer in struct module ...as this will allow split BTF modules with a base BTF representation (rather than the full vmlinux BTF at time of BTF encoding) to resolve their references to kernel types in a way that is more resilient to small changes in kernel types. This will allow modules that are not built every time the kernel is to provide more resilient BTF, rather than have it invalidated every time BTF ids for core kernel types change. Fields are ordered to avoid holes in struct module. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Reviewed-by: Luis Chamberlain Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240620091733.1967885-3-alan.maguire@oracle.com --- include/linux/module.h | 2 ++ kernel/module/main.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..b79d926cae8a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -509,7 +509,9 @@ struct module { #endif #ifdef CONFIG_DEBUG_INFO_BTF_MODULES unsigned int btf_data_size; + unsigned int btf_base_data_size; void *btf_data; + void *btf_base_data; #endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; diff --git a/kernel/module/main.c b/kernel/module/main.c index d18a94b973e1..d9592195c5bb 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -2166,6 +2166,8 @@ static int find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_DEBUG_INFO_BTF_MODULES mod->btf_data = any_section_objs(info, ".BTF", 1, &mod->btf_data_size); + mod->btf_base_data = any_section_objs(info, ".BTF.base", 1, + &mod->btf_base_data_size); #endif #ifdef CONFIG_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", @@ -2590,8 +2592,9 @@ static noinline int do_init_module(struct module *mod) } #ifdef CONFIG_DEBUG_INFO_BTF_MODULES - /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointer */ + /* .BTF is not SHF_ALLOC and will get removed, so sanitize pointers */ mod->btf_data = NULL; + mod->btf_base_data = NULL; #endif /* * We want to free module_init, but be aware that kallsyms may be -- cgit v1.2.3 From 8646db238997df36c6ad71a9d7e0b52ceee221b2 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 20 Jun 2024 10:17:31 +0100 Subject: libbpf,bpf: Share BTF relocate-related code with kernel Share relocation implementation with the kernel. As part of this, we also need the type/string iteration functions so also share btf_iter.c file. Relocation code in kernel and userspace is identical save for the impementation of the reparenting of split BTF to the relocated base BTF and retrieval of the BTF header from "struct btf"; these small functions need separate user-space and kernel implementations for the separate "struct btf"s they operate upon. One other wrinkle on the kernel side is we have to map .BTF.ids in modules as they were generated with the type ids used at BTF encoding time. btf_relocate() optionally returns an array mapping from old BTF ids to relocated ids, so we use that to fix up these references where needed for kfuncs. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240620091733.1967885-5-alan.maguire@oracle.com --- include/linux/btf.h | 64 ++++++++++++++++ kernel/bpf/Makefile | 8 +- kernel/bpf/btf.c | 176 ++++++++++++++++++++++++++++++------------- tools/lib/bpf/btf_iter.c | 8 ++ tools/lib/bpf/btf_relocate.c | 23 ++++++ 5 files changed, 226 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 56d91daacdba..d199fa17abb4 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -140,6 +140,7 @@ extern const struct file_operations btf_fops; const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); +const struct btf_header *btf_header(const struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, @@ -212,8 +213,10 @@ int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); bool btf_is_module(const struct btf *btf); +bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); +struct btf *btf_base_btf(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); @@ -339,6 +342,11 @@ static inline u8 btf_int_offset(const struct btf_type *t) return BTF_INT_OFFSET(*(u32 *)(t + 1)); } +static inline __u8 btf_int_bits(const struct btf_type *t) +{ + return BTF_INT_BITS(*(__u32 *)(t + 1)); +} + static inline bool btf_type_is_scalar(const struct btf_type *t) { return btf_type_is_int(t) || btf_type_is_enum(t); @@ -478,6 +486,11 @@ static inline struct btf_param *btf_params(const struct btf_type *t) return (struct btf_param *)(t + 1); } +static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) +{ + return (struct btf_decl_tag *)(t + 1); +} + static inline int btf_id_cmp_func(const void *a, const void *b) { const int *pa = a, *pb = b; @@ -515,9 +528,38 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf * } #endif +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids); +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); + const char *btf_name_by_offset(const struct btf *btf, u32 offset); +const char *btf_str_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, @@ -544,6 +586,28 @@ static inline const struct btf_type *btf_type_by_id(const struct btf *btf, { return NULL; } + +static inline void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ +} + +static inline int btf_relocate(void *log, struct btf *btf, const struct btf *base_btf, + __u32 **map_ids) +{ + return -EOPNOTSUPP; +} + +static inline int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + return -EOPNOTSUPP; +} + +static inline __u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + return NULL; +} + static inline const char *btf_name_by_offset(const struct btf *btf, u32 offset) { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 7eb9ad3a3ae6..0291eef9ce92 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -50,5 +50,11 @@ endif obj-$(CONFIG_BPF_PRELOAD) += preload/ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o -$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE +obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o +obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o + +# Some source files are common to libbpf. +vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf + +$(obj)/%.o: %.c FORCE $(call if_changed_rule,cc_o_c) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ce4707968217..8e12cb80ba73 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -274,6 +274,7 @@ struct btf { u32 start_str_off; /* first string offset (0 for base BTF) */ char name[MODULE_NAME_LEN]; bool kernel_btf; + __u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */ }; enum verifier_phase { @@ -530,6 +531,11 @@ static bool btf_type_is_decl_tag_target(const struct btf_type *t) btf_type_is_var(t) || btf_type_is_typedef(t); } +bool btf_is_vmlinux(const struct btf *btf) +{ + return btf->kernel_btf && !btf->base_btf; +} + u32 btf_nr_types(const struct btf *btf) { u32 total = 0; @@ -772,7 +778,7 @@ static bool __btf_name_char_ok(char c, bool first) return true; } -static const char *btf_str_by_offset(const struct btf *btf, u32 offset) +const char *btf_str_by_offset(const struct btf *btf, u32 offset) { while (offset < btf->start_str_off) btf = btf->base_btf; @@ -1670,14 +1676,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf) if (!tab) return; - /* For module BTF, we directly assign the sets being registered, so - * there is nothing to free except kfunc_set_tab. - */ - if (btf_is_module(btf)) - goto free_tab; for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) kfree(tab->sets[hook]); -free_tab: kfree(tab); btf->kfunc_set_tab = NULL; } @@ -1735,7 +1735,12 @@ static void btf_free(struct btf *btf) kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); - kvfree(btf->data); + /* vmlinux does not allocate btf->data, it simply points it at + * __start_BTF. + */ + if (!btf_is_vmlinux(btf)) + kvfree(btf->data); + kvfree(btf->base_id_map); kfree(btf); } @@ -1764,6 +1769,23 @@ void btf_put(struct btf *btf) } } +struct btf *btf_base_btf(const struct btf *btf) +{ + return btf->base_btf; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return &btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf_nr_types(base_btf); + btf->start_str_off = base_btf->hdr.str_len; +} + static int env_resolve_init(struct btf_verifier_env *env) { struct btf *btf = env->btf; @@ -6083,23 +6105,15 @@ int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_ty BTF_ID_LIST(bpf_ctx_convert_btf_id) BTF_ID(struct, bpf_ctx_convert) -struct btf *btf_parse_vmlinux(void) +static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name, + void *data, unsigned int data_size) { - struct btf_verifier_env *env = NULL; - struct bpf_verifier_log *log; struct btf *btf = NULL; int err; if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) return ERR_PTR(-ENOENT); - env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); - if (!env) - return ERR_PTR(-ENOMEM); - - log = &env->log; - log->level = BPF_LOG_KERNEL; - btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; @@ -6107,10 +6121,10 @@ struct btf *btf_parse_vmlinux(void) } env->btf = btf; - btf->data = __start_BTF; - btf->data_size = __stop_BTF - __start_BTF; + btf->data = data; + btf->data_size = data_size; btf->kernel_btf = true; - snprintf(btf->name, sizeof(btf->name), "vmlinux"); + snprintf(btf->name, sizeof(btf->name), "%s", name); err = btf_parse_hdr(env); if (err) @@ -6130,20 +6144,11 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; - /* btf_parse_vmlinux() runs under bpf_verifier_lock */ - bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); - refcount_set(&btf->refcnt, 1); - err = btf_alloc_id(btf); - if (err) - goto errout; - - btf_verifier_env_free(env); return btf; errout: - btf_verifier_env_free(env); if (btf) { kvfree(btf->types); kfree(btf); @@ -6151,19 +6156,61 @@ errout: return ERR_PTR(err); } +struct btf *btf_parse_vmlinux(void) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf; + int err; + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF); + if (IS_ERR(btf)) + goto err_out; + + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); + err = btf_alloc_id(btf); + if (err) { + btf_free(btf); + btf = ERR_PTR(err); + } +err_out: + btf_verifier_env_free(env); + return btf; +} + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES -static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) +/* If .BTF_ids section was created with distilled base BTF, both base and + * split BTF ids will need to be mapped to actual base/split ids for + * BTF now that it has been relocated. + */ +static __u32 btf_relocate_id(const struct btf *btf, __u32 id) +{ + if (!btf->base_btf || !btf->base_id_map) + return id; + return btf->base_id_map[id]; +} + +static struct btf *btf_parse_module(const char *module_name, const void *data, + unsigned int data_size, void *base_data, + unsigned int base_data_size) { + struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL; struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; - struct btf *btf = NULL, *base_btf; - int err; + int err = 0; - base_btf = bpf_get_btf_vmlinux(); - if (IS_ERR(base_btf)) - return base_btf; - if (!base_btf) + vmlinux_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(vmlinux_btf)) + return vmlinux_btf; + if (!vmlinux_btf) return ERR_PTR(-EINVAL); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); @@ -6173,6 +6220,16 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u log = &env->log; log->level = BPF_LOG_KERNEL; + if (base_data) { + base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size); + if (IS_ERR(base_btf)) { + err = PTR_ERR(base_btf); + goto errout; + } + } else { + base_btf = vmlinux_btf; + } + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; @@ -6212,12 +6269,22 @@ static struct btf *btf_parse_module(const char *module_name, const void *data, u if (err) goto errout; + if (base_btf != vmlinux_btf) { + err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map); + if (err) + goto errout; + btf_free(base_btf); + base_btf = vmlinux_btf; + } + btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; errout: btf_verifier_env_free(env); + if (base_btf != vmlinux_btf) + btf_free(base_btf); if (btf) { kvfree(btf->data); kvfree(btf->types); @@ -7770,7 +7837,8 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op, err = -ENOMEM; goto out; } - btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size, + mod->btf_base_data, mod->btf_base_data_size); if (IS_ERR(btf)) { kfree(btf_mod); if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) { @@ -8094,7 +8162,7 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, bool add_filter = !!kset->filter; struct btf_kfunc_set_tab *tab; struct btf_id_set8 *set; - u32 set_cnt; + u32 set_cnt, i; int ret; if (hook >= BTF_KFUNC_HOOK_MAX) { @@ -8140,21 +8208,15 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, goto end; } - /* We don't need to allocate, concatenate, and sort module sets, because - * only one is allowed per hook. Hence, we can directly assign the - * pointer and return. - */ - if (!vmlinux_set) { - tab->sets[hook] = add_set; - goto do_add_filter; - } - /* In case of vmlinux sets, there may be more than one set being * registered per hook. To create a unified set, we allocate a new set * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary * searching the set using btf_id_set8_contains function work. + * + * For module sets, we need to allocate as we may need to relocate + * BTF ids. */ set_cnt = set ? set->cnt : 0; @@ -8184,11 +8246,14 @@ static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, /* Concatenate the two sets */ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); + /* Now that the set is copied, update with relocated BTF ids */ + for (i = set->cnt; i < set->cnt + add_set->cnt; i++) + set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id); + set->cnt += add_set->cnt; sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); -do_add_filter: if (add_filter) { hook_filter = &tab->hook_filters[hook]; hook_filter->filters[hook_filter->nr_filters++] = kset->filter; @@ -8308,7 +8373,7 @@ static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, return PTR_ERR(btf); for (i = 0; i < kset->set->cnt; i++) { - ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, + ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id), kset->set->pairs[i].flags); if (ret) goto err_out; @@ -8372,7 +8437,7 @@ static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc u32 nr_args, i; for (i = 0; i < cnt; i++) { - dtor_btf_id = dtors[i].kfunc_btf_id; + dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id); dtor_func = btf_type_by_id(btf, dtor_btf_id); if (!dtor_func || !btf_type_is_func(dtor_func)) @@ -8407,7 +8472,7 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c { struct btf_id_dtor_kfunc_tab *tab; struct btf *btf; - u32 tab_cnt; + u32 tab_cnt, i; int ret; btf = btf_get_module_btf(owner); @@ -8458,6 +8523,13 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c btf->dtor_kfunc_tab = tab; memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); + + /* remap BTF ids based on BTF relocation (if any) */ + for (i = tab_cnt; i < tab_cnt + add_cnt; i++) { + tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id); + tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id); + } + tab->cnt += add_cnt; sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c index c308aa60285d..9a6c822c2294 100644 --- a/tools/lib/bpf/btf_iter.c +++ b/tools/lib/bpf/btf_iter.c @@ -2,8 +2,16 @@ /* Copyright (c) 2021 Facebook */ /* Copyright (c) 2024, Oracle and/or its affiliates. */ +#ifdef __KERNEL__ +#include +#include + +#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) + +#else #include "btf.h" #include "libbpf_internal.h" +#endif int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind) diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index 23a41fb03e0d..2281dbbafa11 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -5,11 +5,34 @@ #define _GNU_SOURCE #endif +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include + +#define btf_type_by_id (struct btf_type *)btf_type_by_id +#define btf__type_cnt btf_nr_types +#define btf__base_btf btf_base_btf +#define btf__name_by_offset btf_name_by_offset +#define btf__str_by_offset btf_str_by_offset +#define btf_kflag btf_type_kflag + +#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) +#define free(ptr) kvfree(ptr) +#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) + +#else + #include "btf.h" #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" +#endif /* __KERNEL__ */ + struct btf; struct btf_relocate { -- cgit v1.2.3 From 07e4fd4c0592ad57fe4c5033393d30362dbfaa5d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:51 +0200 Subject: locking/local_lock: Introduce guard definition for local_lock. Introduce lock guard definition for local_lock_t. There are no users yet. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-2-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/local_lock.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index e55010fa7329..82366a37f447 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -51,4 +51,15 @@ #define local_unlock_irqrestore(lock, flags) \ __local_unlock_irqrestore(lock, flags) +DEFINE_GUARD(local_lock, local_lock_t __percpu*, + local_lock(_T), + local_unlock(_T)) +DEFINE_GUARD(local_lock_irq, local_lock_t __percpu*, + local_lock_irq(_T), + local_unlock_irq(_T)) +DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, + local_lock_irqsave(_T->lock, _T->flags), + local_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #endif -- cgit v1.2.3 From c5bcab7558220bbad8bd4863576afd1b340ce29f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:52 +0200 Subject: locking/local_lock: Add local nested BH locking infrastructure. Add local_lock_nested_bh() locking. It is based on local_lock_t and the naming follows the preempt_disable_nested() example. For !PREEMPT_RT + !LOCKDEP it is a per-CPU annotation for locking assumptions based on local_bh_disable(). The macro is optimized away during compilation. For !PREEMPT_RT + LOCKDEP the local_lock_nested_bh() is reduced to the usual lock-acquire plus lockdep_assert_in_softirq() - ensuring that BH is disabled. For PREEMPT_RT local_lock_nested_bh() acquires the specified per-CPU lock. It does not disable CPU migration because it relies on local_bh_disable() disabling CPU migration. With LOCKDEP it performans the usual lockdep checks as with !PREEMPT_RT. Due to include hell the softirq check has been moved spinlock.c. The intention is to use this locking in places where locking of a per-CPU variable relies on BH being disabled. Instead of treating disabled bottom halves as a big per-CPU lock, PREEMPT_RT can use this to reduce the locking scope to what actually needs protecting. A side effect is that it also documents the protection scope of the per-CPU variables. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-3-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/local_lock.h | 10 ++++++++++ include/linux/local_lock_internal.h | 31 +++++++++++++++++++++++++++++++ include/linux/lockdep.h | 3 +++ kernel/locking/spinlock.c | 8 ++++++++ 4 files changed, 52 insertions(+) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 82366a37f447..091dc0b6bdfb 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -62,4 +62,14 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, local_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +#define local_lock_nested_bh(_lock) \ + __local_lock_nested_bh(_lock) + +#define local_unlock_nested_bh(_lock) \ + __local_unlock_nested_bh(_lock) + +DEFINE_GUARD(local_lock_nested_bh, local_lock_t __percpu*, + local_lock_nested_bh(_T), + local_unlock_nested_bh(_T)) + #endif diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 975e33b793a7..8dd71fbbb6d2 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -62,6 +62,17 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __spinlock_nested_bh_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ + lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \ + 0, LD_WAIT_CONFIG, LD_WAIT_INV, \ + LD_LOCK_NORMAL); \ + local_lock_debug_init(lock); \ +} while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ @@ -98,6 +109,15 @@ do { \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -138,4 +158,15 @@ typedef spinlock_t local_lock_t; #define __local_unlock_irqrestore(lock, flags) __local_unlock(lock) +#define __local_lock_nested_bh(lock) \ +do { \ + lockdep_assert_in_softirq_func(); \ + spin_lock(this_cpu_ptr(lock)); \ +} while (0) + +#define __local_unlock_nested_bh(lock) \ +do { \ + spin_unlock(this_cpu_ptr((lock))); \ +} while (0) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 08b0d1d9d78b..3f5a551579cc 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -600,6 +600,8 @@ do { \ (!in_softirq() || in_irq() || in_nmi())); \ } while (0) +extern void lockdep_assert_in_softirq_func(void); + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) @@ -613,6 +615,7 @@ do { \ # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) # define lockdep_assert_in_softirq() do { } while (0) +# define lockdep_assert_in_softirq_func() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 8475a0794f8c..438c6086d540 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -413,3 +413,11 @@ notrace int in_lock_functions(unsigned long addr) && addr < (unsigned long)__lock_text_end; } EXPORT_SYMBOL(in_lock_functions); + +#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_PREEMPT_RT) +void notrace lockdep_assert_in_softirq_func(void) +{ + lockdep_assert_in_softirq(); +} +EXPORT_SYMBOL(lockdep_assert_in_softirq_func); +#endif -- cgit v1.2.3 From ebad6d0334793f16a16e5320182f665292a31e0c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:56 +0200 Subject: net/ipv4: Use nested-BH locking for ipv4_tcp_sk. ipv4_tcp_sk is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Make a struct with a sock member (original ipv4_tcp_sk) and a local_lock_t and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-7-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/sock.h | 5 +++++ net/ipv4/tcp_ipv4.c | 15 +++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b30ea0c342a6..cce23ac4d514 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -544,6 +544,11 @@ struct sock { netns_tracker ns_tracker; }; +struct sock_bh_locked { + struct sock *sock; + local_lock_t bh_lock; +}; + enum sk_pacing { SK_PACING_NONE = 0, SK_PACING_NEEDED = 1, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8e49d69279d5..fd17f25ff288 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -93,7 +93,9 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); +static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static u32 tcp_v4_init_seq(const struct sk_buff *skb) { @@ -882,7 +884,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); + local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); + ctl_sk = this_cpu_read(ipv4_tcp_sk.sock); + sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? @@ -907,6 +911,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); + local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock); local_bh_enable(); #ifdef CONFIG_TCP_MD5SIG @@ -1002,7 +1007,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = this_cpu_read(ipv4_tcp_sk); + local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); + ctl_sk = this_cpu_read(ipv4_tcp_sk.sock); sock_net_set(ctl_sk, net); ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark); @@ -1017,6 +1023,7 @@ static void tcp_v4_send_ack(const struct sock *sk, sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); + local_unlock_nested_bh(&ipv4_tcp_sk.bh_lock); local_bh_enable(); } @@ -3615,7 +3622,7 @@ void __init tcp_v4_init(void) sk->sk_clockid = CLOCK_MONOTONIC; - per_cpu(ipv4_tcp_sk, cpu) = sk; + per_cpu(ipv4_tcp_sk.sock, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); -- cgit v1.2.3 From ecefbc09e8ee768ae85b7bb7a1de8c8287397d68 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:58 +0200 Subject: net: softnet_data: Make xmit per task. Softirq is preemptible on PREEMPT_RT. Without a per-CPU lock in local_bh_disable() there is no guarantee that only one device is transmitting at a time. With preemption and multiple senders it is possible that the per-CPU `recursion' counter gets incremented by different threads and exceeds XMIT_RECURSION_LIMIT leading to a false positive recursion alert. The `more' member is subject to similar problems if set by one thread for one driver and wrongly used by another driver within another thread. Instead of adding a lock to protect the per-CPU variable it is simpler to make xmit per-task. Sending and receiving skbs happens always in thread context anyway. Having a lock to protected the per-CPU counter would block/ serialize two sending threads needlessly. It would also require a recursive lock to ensure that the owner can increment the counter further. Make the softnet_data.xmit a task_struct member on PREEMPT_RT. Add needed wrapper. Cc: Ben Segall Cc: Daniel Bristot de Oliveira Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Mel Gorman Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-9-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 42 +++++++++++++++++++++++++++++++----------- include/linux/netdevice_xmit.h | 13 +++++++++++++ include/linux/sched.h | 5 ++++- net/core/dev.c | 14 ++++++++++++++ net/core/dev.h | 18 ++++++++++++++++++ 5 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 include/linux/netdevice_xmit.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c83b390191d4..f6fc9066147d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -3223,13 +3224,7 @@ struct softnet_data { struct sk_buff_head xfrm_backlog; #endif /* written and read only by owning cpu: */ - struct { - u16 recursion; - u8 more; -#ifdef CONFIG_NET_EGRESS - u8 skip_txqueue; -#endif - } xmit; + struct netdev_xmit xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. @@ -3257,10 +3252,18 @@ struct softnet_data { DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +#ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } +#else +static inline int dev_recursion_level(void) +{ + return current->net_xmit.recursion; +} + +#endif void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4872,18 +4875,35 @@ static inline ktime_t netdev_get_tstamp(struct net_device *dev, return hwtstamps->hwtstamp; } -static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, - struct sk_buff *skb, struct net_device *dev, - bool more) +#ifndef CONFIG_PREEMPT_RT +static inline void netdev_xmit_set_more(bool more) { __this_cpu_write(softnet_data.xmit.more, more); - return ops->ndo_start_xmit(skb, dev); } static inline bool netdev_xmit_more(void) { return __this_cpu_read(softnet_data.xmit.more); } +#else +static inline void netdev_xmit_set_more(bool more) +{ + current->net_xmit.more = more; +} + +static inline bool netdev_xmit_more(void) +{ + return current->net_xmit.more; +} +#endif + +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev, + bool more) +{ + netdev_xmit_set_more(more); + return ops->ndo_start_xmit(skb, dev); +} static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h new file mode 100644 index 000000000000..38325e070296 --- /dev/null +++ b/include/linux/netdevice_xmit.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_NETDEVICE_XMIT_H +#define _LINUX_NETDEVICE_XMIT_H + +struct netdev_xmit { + u16 recursion; + u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif +}; + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..5187486c2522 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -975,7 +976,9 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif - +#ifdef CONFIG_PREEMPT_RT + struct netdev_xmit net_xmit; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; diff --git a/net/core/dev.c b/net/core/dev.c index 093d82bf0e28..95b9e4cc1767 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3940,6 +3940,7 @@ netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb) return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm)); } +#ifndef CONFIG_PREEMPT_RT static bool netdev_xmit_txqueue_skipped(void) { return __this_cpu_read(softnet_data.xmit.skip_txqueue); @@ -3950,6 +3951,19 @@ void netdev_xmit_skip_txqueue(bool skip) __this_cpu_write(softnet_data.xmit.skip_txqueue, skip); } EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); + +#else +static bool netdev_xmit_txqueue_skipped(void) +{ + return current->net_xmit.skip_txqueue; +} + +void netdev_xmit_skip_txqueue(bool skip) +{ + current->net_xmit.skip_txqueue = skip; +} +EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); +#endif #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS diff --git a/net/core/dev.h b/net/core/dev.h index 58f88d28bc99..5654325c5b71 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -150,6 +150,8 @@ struct napi_struct *napi_by_id(unsigned int napi_id); void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu); #define XMIT_RECURSION_LIMIT 8 + +#ifndef CONFIG_PREEMPT_RT static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > @@ -165,6 +167,22 @@ static inline void dev_xmit_recursion_dec(void) { __this_cpu_dec(softnet_data.xmit.recursion); } +#else +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->net_xmit.recursion > XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->net_xmit.recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->net_xmit.recursion--; +} +#endif int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, -- cgit v1.2.3 From b22800f9d3b142bf2550dd47ff738b9feedc1093 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:00 +0200 Subject: dev: Use nested-BH locking for softnet_data.process_queue. softnet_data::process_queue is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. softnet_data::input_queue_head can be updated lockless. This is fine because this value is only update CPU local by the local backlog_napi thread. Add a local_lock_t to softnet_data and use local_lock_nested_bh() for locking of process_queue. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-11-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f6fc9066147d..4e81660b4462 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3202,6 +3202,7 @@ static inline bool dev_has_header(const struct net_device *dev) struct softnet_data { struct list_head poll_list; struct sk_buff_head process_queue; + local_lock_t process_queue_bh_lock; /* stats */ unsigned int processed; diff --git a/net/core/dev.c b/net/core/dev.c index 73c4d14e4feb..8ef727c2ae2b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -449,7 +449,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * queue in the local softnet handler. */ -DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data) = { + .process_queue_bh_lock = INIT_LOCAL_LOCK(process_queue_bh_lock), +}; EXPORT_PER_CPU_SYMBOL(softnet_data); /* Page_pool has a lockless array/stack to alloc/recycle pages. @@ -5949,6 +5951,7 @@ static void flush_backlog(struct work_struct *work) } backlog_unlock_irq_enable(sd); + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); @@ -5956,6 +5959,7 @@ static void flush_backlog(struct work_struct *work) rps_input_queue_head_incr(sd); } } + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); } @@ -6077,7 +6081,9 @@ static int process_backlog(struct napi_struct *napi, int quota) while (again) { struct sk_buff *skb; + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); while ((skb = __skb_dequeue(&sd->process_queue))) { + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); rcu_read_lock(); __netif_receive_skb(skb); rcu_read_unlock(); @@ -6086,7 +6092,9 @@ static int process_backlog(struct napi_struct *napi, int quota) return work; } + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); } + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); backlog_lock_irq_disable(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { @@ -6101,8 +6109,10 @@ static int process_backlog(struct napi_struct *napi, int quota) napi->state &= NAPIF_STATE_THREADED; again = false; } else { + local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_splice_tail_init(&sd->input_pkt_queue, &sd->process_queue); + local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); } backlog_unlock_irq_enable(sd); } -- cgit v1.2.3 From d1542d4ae4dfdc47c9b3205ebe849ed23af213dd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:02 +0200 Subject: seg6: Use nested-BH locking for seg6_bpf_srh_states. The access to seg6_bpf_srh_states is protected by disabling preemption. Based on the code, the entry point is input_action_end_bpf() and every other function (the bpf helper functions bpf_lwt_seg6_*()), that is accessing seg6_bpf_srh_states, should be called from within input_action_end_bpf(). input_action_end_bpf() accesses seg6_bpf_srh_states first at the top of the function and then disables preemption. This looks wrong because if preemption needs to be disabled as part of the locking mechanism then the variable shouldn't be accessed beforehand. Looking at how it is used via test_lwt_seg6local.sh then input_action_end_bpf() is always invoked from softirq context. If this is always the case then the preempt_disable() statement is superfluous. If this is not always invoked from softirq then disabling only preemption is not sufficient. Replace the preempt_disable() statement with nested-BH locking. This is not an equivalent replacement as it assumes that the invocation of input_action_end_bpf() always occurs in softirq context and thus the preempt_disable() is superfluous. Add a local_lock_t the data structure and use local_lock_nested_bh() for locking. Add lockdep_assert_held() to ensure the lock is held while the per-CPU variable is referenced in the helper functions. Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: David Ahern Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-13-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/seg6_local.h | 1 + net/core/filter.c | 3 +++ net/ipv6/seg6_local.c | 22 ++++++++++++++-------- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h index 3fab9dec2ec4..888c1ce6f527 100644 --- a/include/net/seg6_local.h +++ b/include/net/seg6_local.h @@ -19,6 +19,7 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb); struct seg6_bpf_srh_state { + local_lock_t bh_lock; struct ipv6_sr_hdr *srh; u16 hdrlen; bool valid; diff --git a/net/core/filter.c b/net/core/filter.c index b077e7467946..c4f5db324f04 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6455,6 +6455,7 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset, void *srh_tlvs, *srh_end, *ptr; int srhoff = 0; + lockdep_assert_held(&srh_state->bh_lock); if (srh == NULL) return -EINVAL; @@ -6511,6 +6512,7 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb, int hdroff = 0; int err; + lockdep_assert_held(&srh_state->bh_lock); switch (action) { case SEG6_LOCAL_ACTION_END_X: if (!seg6_bpf_has_valid_srh(skb)) @@ -6587,6 +6589,7 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset, int srhoff = 0; int ret; + lockdep_assert_held(&srh_state->bh_lock); if (unlikely(srh == NULL)) return -EINVAL; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index c434940131b1..c74705ead984 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -1380,7 +1380,9 @@ drop: return err; } -DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); +DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; bool seg6_bpf_has_valid_srh(struct sk_buff *skb) { @@ -1388,6 +1390,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb) this_cpu_ptr(&seg6_bpf_srh_states); struct ipv6_sr_hdr *srh = srh_state->srh; + lockdep_assert_held(&srh_state->bh_lock); if (unlikely(srh == NULL)) return false; @@ -1408,8 +1411,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb) static int input_action_end_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct seg6_bpf_srh_state *srh_state = - this_cpu_ptr(&seg6_bpf_srh_states); + struct seg6_bpf_srh_state *srh_state; struct ipv6_sr_hdr *srh; int ret; @@ -1420,10 +1422,14 @@ static int input_action_end_bpf(struct sk_buff *skb, } advance_nextseg(srh, &ipv6_hdr(skb)->daddr); - /* preempt_disable is needed to protect the per-CPU buffer srh_state, - * which is also accessed by the bpf_lwt_seg6_* helpers + /* The access to the per-CPU buffer srh_state is protected by running + * always in softirq context (with disabled BH). On PREEMPT_RT the + * required locking is provided by the following local_lock_nested_bh() + * statement. It is also accessed by the bpf_lwt_seg6_* helpers via + * bpf_prog_run_save_cb(). */ - preempt_disable(); + local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock); + srh_state = this_cpu_ptr(&seg6_bpf_srh_states); srh_state->srh = srh; srh_state->hdrlen = srh->hdrlen << 3; srh_state->valid = true; @@ -1446,15 +1452,15 @@ static int input_action_end_bpf(struct sk_buff *skb, if (srh_state->srh && !seg6_bpf_has_valid_srh(skb)) goto drop; + local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock); - preempt_enable(); if (ret != BPF_REDIRECT) seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); drop: - preempt_enable(); + local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3 From 401cb7dae8130fd34eb84648e02ab4c506df7d5e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:04 +0200 Subject: net: Reference bpf_redirect_info via task_struct on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XDP redirect process is two staged: - bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the packet and makes decisions. While doing that, the per-CPU variable bpf_redirect_info is used. - Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info and it may also access other per-CPU variables like xskmap_flush_list. At the very end of the NAPI callback, xdp_do_flush() is invoked which does not access bpf_redirect_info but will touch the individual per-CPU lists. The per-CPU variables are only used in the NAPI callback hence disabling bottom halves is the only protection mechanism. Users from preemptible context (like cpu_map_kthread_run()) explicitly disable bottom halves for protections reasons. Without locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. PREEMPT_RT has forced-threaded interrupts enabled and every NAPI-callback runs in a thread. If each thread has its own data structure then locking can be avoided. Create a struct bpf_net_context which contains struct bpf_redirect_info. Define the variable on stack, use bpf_net_ctx_set() to save a pointer to it, bpf_net_ctx_clear() removes it again. The bpf_net_ctx_set() may nest. For instance a function can be used from within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and NET_TX_SOFTIRQ which does not. Therefore only the first invocations updates the pointer. Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct bpf_redirect_info. The returned data structure is zero initialized to ensure nothing is leaked from stack. This is done on first usage of the struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to note that initialisation is required. First invocation of bpf_net_ctx_get_ri() will memset() the data structure and update bpf_redirect_info::kern_flags. bpf_redirect_info::nh is excluded from memset because it is only used once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is moved past nh to exclude it from memset. The pointer to bpf_net_context is saved task's task_struct. Using always the bpf_net_context approach has the advantage that there is almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds. Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 56 +++++++++++++++++++++++++++++++++++++++++--------- include/linux/sched.h | 3 +++ kernel/bpf/cpumap.c | 3 +++ kernel/bpf/devmap.c | 9 +++++++- kernel/fork.c | 1 + net/bpf/test_run.c | 11 +++++++++- net/core/dev.c | 29 +++++++++++++++++++++++++- net/core/filter.c | 44 +++++++++++---------------------------- net/core/lwt_bpf.c | 3 +++ 9 files changed, 114 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b02aea291b7e..0a7f6e4a00b6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -733,21 +733,59 @@ struct bpf_nh_params { }; }; +/* flags for bpf_redirect_info kern_flags */ +#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +#define BPF_RI_F_RI_INIT BIT(1) + struct bpf_redirect_info { u64 tgt_index; void *tgt_value; struct bpf_map *map; u32 flags; - u32 kern_flags; u32 map_id; enum bpf_map_type map_type; struct bpf_nh_params nh; + u32 kern_flags; }; -DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); +struct bpf_net_context { + struct bpf_redirect_info ri; +}; -/* flags for bpf_redirect_info kern_flags */ -#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) +{ + struct task_struct *tsk = current; + + if (tsk->bpf_net_context != NULL) + return NULL; + bpf_net_ctx->ri.kern_flags = 0; + + tsk->bpf_net_context = bpf_net_ctx; + return bpf_net_ctx; +} + +static inline void bpf_net_ctx_clear(struct bpf_net_context *bpf_net_ctx) +{ + if (bpf_net_ctx) + current->bpf_net_context = NULL; +} + +static inline struct bpf_net_context *bpf_net_ctx_get(void) +{ + return current->bpf_net_context; +} + +static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) { + memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh)); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_RI_INIT; + } + + return &bpf_net_ctx->ri; +} /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, @@ -1018,25 +1056,23 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); -void bpf_clear_redirect_map(struct bpf_map *map); - static inline bool xdp_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_set_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_clear_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; } @@ -1592,7 +1628,7 @@ static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 inde u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; /* Lower bits of the flags are used as return code on lookup failure */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5187486c2522..5ff5e65a4627 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -54,6 +54,7 @@ struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; +struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1509,6 +1510,8 @@ struct task_struct { /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif + /* Used by BPF for per-TASK xdp storage */ + struct bpf_net_context *bpf_net_context; #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a8e34416e960..66974bd02710 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -240,12 +240,14 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, int xdp_n, struct xdp_cpumap_stats *stats, struct list_head *list) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int nframes; if (!rcpu->prog) return xdp_n; rcu_read_lock_bh(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats); @@ -255,6 +257,7 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames, if (unlikely(!list_empty(list))) cpu_map_bpf_prog_run_skb(rcpu, list, stats); + bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ return nframes; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7f3b34452243..fbfdfb60db8d 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -196,7 +196,14 @@ static void dev_map_free(struct bpf_map *map) list_del_rcu(&dtab->list); spin_unlock(&dev_map_lock); - bpf_clear_redirect_map(map); + /* bpf_redirect_info->map is assigned in __bpf_xdp_redirect_map() + * during NAPI callback and cleared after the XDP redirect. There is no + * explicit RCU read section which protects bpf_redirect_info->map but + * local_bh_disable() also marks the beginning an RCU section. This + * makes the complete softirq callback RCU protected. Thus after + * following synchronize_rcu() there no bpf_redirect_info->map == map + * assignment. + */ synchronize_rcu(); /* Make sure prior __dev_map_entry_free() have completed. */ diff --git a/kernel/fork.c b/kernel/fork.c index 99076dbe27d8..f314bdd7e610 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2355,6 +2355,7 @@ __latent_entropy struct task_struct *copy_process( RCU_INIT_POINTER(p->bpf_storage, NULL); p->bpf_ctx = NULL; #endif + p->bpf_net_context = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 36ae54f57bf5..a6d7f790cdda 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -283,9 +283,10 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes, static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, u32 repeat) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int err = 0, act, ret, i, nframes = 0, batch_sz; struct xdp_frame **frames = xdp->frames; + struct bpf_redirect_info *ri; struct xdp_page_head *head; struct xdp_frame *frm; bool redirect = false; @@ -295,6 +296,8 @@ static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, batch_sz = min_t(u32, repeat, xdp->batch_size); local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + ri = bpf_net_ctx_get_ri(); xdp_set_return_frame_no_direct(); for (i = 0; i < batch_sz; i++) { @@ -359,6 +362,7 @@ out: } xdp_clear_return_frame_no_direct(); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return err; } @@ -394,6 +398,7 @@ static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct bpf_prog_array_item item = {.prog = prog}; struct bpf_run_ctx *old_ctx; struct bpf_cg_run_ctx run_ctx; @@ -419,10 +424,14 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, do { run_ctx.prog_item = &item; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = bpf_prog_run(prog, ctx); + + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); bpf_reset_run_ctx(old_ctx); diff --git a/net/core/dev.c b/net/core/dev.c index 8ef727c2ae2b..b94fb4e63a28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4045,10 +4045,13 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int sch_ret; if (!entry) return skb; + + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4077,10 +4080,12 @@ ingress_verdict: break; } *ret = NET_RX_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; case TC_ACT_SHOT: kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; /* used by tc_run */ case TC_ACT_STOLEN: @@ -4090,8 +4095,10 @@ ingress_verdict: fallthrough; case TC_ACT_CONSUMED: *ret = NET_RX_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; } + bpf_net_ctx_clear(bpf_net_ctx); return skb; } @@ -4101,11 +4108,14 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int sch_ret; if (!entry) return skb; + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was * already set by the caller. */ @@ -4121,10 +4131,12 @@ egress_verdict: /* No need to push/pop skb's mac_header here on egress! */ skb_do_redirect(skb); *ret = NET_XMIT_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; case TC_ACT_SHOT: kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; /* used by tc_run */ case TC_ACT_STOLEN: @@ -4134,8 +4146,10 @@ egress_verdict: fallthrough; case TC_ACT_CONSUMED: *ret = NET_XMIT_SUCCESS; + bpf_net_ctx_clear(bpf_net_ctx); return NULL; } + bpf_net_ctx_clear(bpf_net_ctx); return skb; } @@ -6325,6 +6339,7 @@ enum { static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, unsigned flags, u16 budget) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; bool skip_schedule = false; unsigned long timeout; int rc; @@ -6342,6 +6357,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state); local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (flags & NAPI_F_PREFER_BUSY_POLL) { napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); @@ -6364,6 +6380,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, netpoll_poll_unlock(have_poll_lock); if (rc == budget) __busy_poll_stop(napi, skip_schedule); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); } @@ -6373,6 +6390,7 @@ static void __napi_busy_loop(unsigned int napi_id, { unsigned long start_time = loop_end ? busy_loop_current_time() : 0; int (*napi_poll)(struct napi_struct *napi, int budget); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; void *have_poll_lock = NULL; struct napi_struct *napi; @@ -6391,6 +6409,7 @@ restart: int work = 0; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); if (!napi_poll) { unsigned long val = READ_ONCE(napi->state); @@ -6421,6 +6440,7 @@ count: __NET_ADD_STATS(dev_net(napi->dev), LINUX_MIB_BUSYPOLLRXPACKETS, work); skb_defer_free_flush(this_cpu_ptr(&softnet_data)); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); if (!loop_end || loop_end(loop_end_arg, start_time)) @@ -6848,6 +6868,7 @@ static int napi_thread_wait(struct napi_struct *napi) static void napi_threaded_poll_loop(struct napi_struct *napi) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; struct softnet_data *sd; unsigned long last_qs = jiffies; @@ -6856,6 +6877,8 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) void *have; local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); + sd = this_cpu_ptr(&softnet_data); sd->in_napi_threaded_poll = true; @@ -6871,6 +6894,7 @@ static void napi_threaded_poll_loop(struct napi_struct *napi) net_rps_action_and_irq_enable(sd); } skb_defer_free_flush(sd); + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); if (!repoll) @@ -6896,10 +6920,12 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + usecs_to_jiffies(READ_ONCE(net_hotdata.netdev_budget_usecs)); + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int budget = READ_ONCE(net_hotdata.netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); start: sd->in_net_rx_action = true; local_irq_disable(); @@ -6952,7 +6978,8 @@ start: sd->in_net_rx_action = false; net_rps_action_and_irq_enable(sd); -end:; +end: + bpf_net_ctx_clear(bpf_net_ctx); } struct netdev_adjacent { diff --git a/net/core/filter.c b/net/core/filter.c index adb887195c3a..eb1c4425c06f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2483,9 +2483,6 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { .arg3_type = ARG_ANYTHING, }; -DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); -EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); - static struct net_device *skb_get_peer_dev(struct net_device *dev) { const struct net_device_ops *ops = dev->netdev_ops; @@ -2498,7 +2495,7 @@ static struct net_device *skb_get_peer_dev(struct net_device *dev) int skb_do_redirect(struct sk_buff *skb) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct net *net = dev_net(skb->dev); struct net_device *dev; u32 flags = ri->flags; @@ -2531,7 +2528,7 @@ out_drop: BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL))) return TC_ACT_SHOT; @@ -2552,7 +2549,7 @@ static const struct bpf_func_proto bpf_redirect_proto = { BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags)) return TC_ACT_SHOT; @@ -2574,7 +2571,7 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, int, plen, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; @@ -4300,30 +4297,13 @@ void xdp_do_check_flushed(struct napi_struct *napi) } #endif -void bpf_clear_redirect_map(struct bpf_map *map) -{ - struct bpf_redirect_info *ri; - int cpu; - - for_each_possible_cpu(cpu) { - ri = per_cpu_ptr(&bpf_redirect_info, cpu); - /* Avoid polluting remote cacheline due to writes if - * not needed. Once we pass this test, we need the - * cmpxchg() to make sure it hasn't been changed in - * the meantime by remote CPU. - */ - if (unlikely(READ_ONCE(ri->map) == map)) - cmpxchg(&ri->map, map, NULL); - } -} - DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); u32 xdp_master_redirect(struct xdp_buff *xdp) { + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct net_device *master, *slave; - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); @@ -4395,7 +4375,7 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, map = READ_ONCE(ri->map); /* The map pointer is cleared when the map is being torn - * down by bpf_clear_redirect_map() + * down by dev_map_free() */ if (unlikely(!map)) { err = -ENOENT; @@ -4440,7 +4420,7 @@ err: int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; if (map_type == BPF_MAP_TYPE_XSKMAP) @@ -4454,7 +4434,7 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; if (map_type == BPF_MAP_TYPE_XSKMAP) @@ -4471,7 +4451,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, enum bpf_map_type map_type, u32 map_id, u32 flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct bpf_map *map; int err; @@ -4483,7 +4463,7 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, map = READ_ONCE(ri->map); /* The map pointer is cleared when the map is being torn - * down by bpf_clear_redirect_map() + * down by dev_map_free() */ if (unlikely(!map)) { err = -ENOENT; @@ -4525,7 +4505,7 @@ err: int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; @@ -4561,7 +4541,7 @@ err: BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); if (unlikely(flags)) return XDP_ABORTED; diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index a94943681e5a..afb05f58b64c 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -38,12 +38,14 @@ static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, struct dst_entry *dst, bool can_redirect) { + struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; int ret; /* Disabling BH is needed to protect per-CPU bpf_redirect_info between * BPF prog and skb_do_redirect(). */ local_bh_disable(); + bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -76,6 +78,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } + bpf_net_ctx_clear(bpf_net_ctx); local_bh_enable(); return ret; -- cgit v1.2.3 From 3f9fe37d9e16a6cfd5f4d1f536686ea71db3196f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:05 +0200 Subject: net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-CPU flush lists, which are accessed from within the NAPI callback (xdp_do_flush() for instance), are per-CPU. There are subject to the same problem as struct bpf_redirect_info. Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the access. The lists initialized on first usage (similar to bpf_net_ctx_get_ri()). Cc: "Björn Töpel" Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: Jonathan Lemon Cc: KP Singh Cc: Maciej Fijalkowski Cc: Magnus Karlsson Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-16-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/cpumap.c | 19 +++---------------- kernel/bpf/devmap.c | 11 +++-------- net/xdp/xsk.c | 12 ++++-------- 4 files changed, 52 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0a7f6e4a00b6..c0349522de8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,6 +736,9 @@ struct bpf_nh_params { /* flags for bpf_redirect_info kern_flags */ #define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ #define BPF_RI_F_RI_INIT BIT(1) +#define BPF_RI_F_CPU_MAP_INIT BIT(2) +#define BPF_RI_F_DEV_MAP_INIT BIT(3) +#define BPF_RI_F_XSK_MAP_INIT BIT(4) struct bpf_redirect_info { u64 tgt_index; @@ -750,6 +753,9 @@ struct bpf_redirect_info { struct bpf_net_context { struct bpf_redirect_info ri; + struct list_head cpu_map_flush_list; + struct list_head dev_map_flush_list; + struct list_head xskmap_map_flush_list; }; static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) @@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) return &bpf_net_ctx->ri; } +static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT; + } + + return &bpf_net_ctx->cpu_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT; + } + + return &bpf_net_ctx->dev_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT; + } + + return &bpf_net_ctx->xskmap_map_flush_list; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 66974bd02710..068e994ed781 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -79,8 +79,6 @@ struct bpf_cpu_map { struct bpf_cpu_map_entry __rcu **cpu_map; }; -static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list); - static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; @@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) @@ -761,7 +759,7 @@ trace: void __cpu_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -775,20 +773,9 @@ void __cpu_map_flush(void) #ifdef CONFIG_DEBUG_NET bool cpu_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&cpu_map_flush_list))) + if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) return false; __cpu_map_flush(); return true; } #endif - -static int __init cpu_map_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu)); - return 0; -} - -subsys_initcall(cpu_map_init); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index fbfdfb60db8d..317ac2d66ebd 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -83,7 +83,6 @@ struct bpf_dtab { u32 n_buckets; }; -static DEFINE_PER_CPU(struct list_head, dev_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); @@ -415,7 +414,7 @@ out: */ void __dev_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -429,7 +428,7 @@ void __dev_flush(void) #ifdef CONFIG_DEBUG_NET bool dev_check_flush(void) { - if (list_empty(this_cpu_ptr(&dev_flush_list))) + if (list_empty(bpf_net_ctx_get_dev_flush_list())) return false; __dev_flush(); return true; @@ -460,7 +459,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) @@ -1160,15 +1159,11 @@ static struct notifier_block dev_map_notifier = { static int __init dev_map_init(void) { - int cpu; - /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != offsetof(struct _bpf_dtab_netdev, dev)); register_netdevice_notifier(&dev_map_notifier); - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); return 0; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7d1c0986f9bb..ed062e038389 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -35,8 +35,6 @@ #define TX_BATCH_SIZE 32 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) -static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); - void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -372,7 +370,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); int err; err = xsk_rcv(xs, xdp); @@ -387,7 +385,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) void __xsk_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -399,7 +397,7 @@ void __xsk_map_flush(void) #ifdef CONFIG_DEBUG_NET bool xsk_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&xskmap_flush_list))) + if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) return false; __xsk_map_flush(); return true; @@ -1772,7 +1770,7 @@ static struct pernet_operations xsk_net_ops = { static int __init xsk_init(void) { - int err, cpu; + int err; err = proto_register(&xsk_proto, 0 /* no slab */); if (err) @@ -1790,8 +1788,6 @@ static int __init xsk_init(void) if (err) goto out_pernet; - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); return 0; out_pernet: -- cgit v1.2.3 From 07b87f9eea0c30675084d50c82532d20168da009 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 20 Jun 2024 08:47:24 +0200 Subject: xfrm: Fix unregister netdevice hang on hardware offload. When offloading xfrm states to hardware, the offloading device is attached to the skbs secpath. If a skb is free is deferred, an unregister netdevice hangs because the netdevice is still refcounted. Fix this by removing the netdevice from the xfrm states when the netdevice is unregistered. To find all xfrm states that need to be cleared we add another list where skbs linked to that are unlinked from the lists (deleted) but not yet freed. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 36 +++++++++--------------------- net/xfrm/xfrm_state.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..7d4c2235252c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -178,7 +178,10 @@ struct xfrm_state { struct hlist_node gclist; struct hlist_node bydst; }; - struct hlist_node bysrc; + union { + struct hlist_node dev_gclist; + struct hlist_node bysrc; + }; struct hlist_node byspi; struct hlist_node byseq; @@ -1588,7 +1591,7 @@ void xfrm_state_update_stats(struct net *net); static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; - struct net_device *dev = xdo->dev; + struct net_device *dev = READ_ONCE(xdo->dev); if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_update_stats) @@ -1946,13 +1949,16 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +void xfrm_dev_state_delete(struct xfrm_state *x); +void xfrm_dev_state_free(struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); - if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) - xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); + if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn) + dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); } static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) @@ -1973,28 +1979,6 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) return false; } -static inline void xfrm_dev_state_delete(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - - if (xso->dev) - xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); -} - -static inline void xfrm_dev_state_free(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - struct net_device *dev = xso->dev; - - if (dev && dev->xfrmdev_ops) { - if (dev->xfrmdev_ops->xdo_dev_state_free) - dev->xfrmdev_ops->xdo_dev_state_free(x); - xso->dev = NULL; - xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; - netdev_put(dev, &xso->dev_tracker); - } -} - static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) { struct xfrm_dev_offload *xdo = &x->xdo; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 649bb739df0d..d531d2a1fae2 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -49,6 +49,7 @@ static struct kmem_cache *xfrm_state_cache __ro_after_init; static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); static HLIST_HEAD(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_dev_gc_list); static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) { @@ -214,6 +215,7 @@ static DEFINE_SPINLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO]; static DEFINE_SPINLOCK(xfrm_state_gc_lock); +static DEFINE_SPINLOCK(xfrm_state_dev_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -683,6 +685,40 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) } EXPORT_SYMBOL(xfrm_state_alloc); +#ifdef CONFIG_XFRM_OFFLOAD +void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); + + if (dev) { + dev->xfrmdev_ops->xdo_dev_state_delete(x); + spin_lock_bh(&xfrm_state_dev_gc_lock); + hlist_add_head(&x->dev_gclist, &xfrm_state_dev_gc_list); + spin_unlock_bh(&xfrm_state_dev_gc_lock); + } +} + +void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); + + if (dev && dev->xfrmdev_ops) { + spin_lock_bh(&xfrm_state_dev_gc_lock); + if (!hlist_unhashed(&x->dev_gclist)) + hlist_del(&x->dev_gclist); + spin_unlock_bh(&xfrm_state_dev_gc_lock); + + if (dev->xfrmdev_ops->xdo_dev_state_free) + dev->xfrmdev_ops->xdo_dev_state_free(x); + WRITE_ONCE(xso->dev, NULL); + xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; + netdev_put(dev, &xso->dev_tracker); + } +} +#endif + void __xfrm_state_destroy(struct xfrm_state *x, bool sync) { WARN_ON(x->km.state != XFRM_STATE_DEAD); @@ -848,6 +884,9 @@ EXPORT_SYMBOL(xfrm_state_flush); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) { + struct xfrm_state *x; + struct hlist_node *tmp; + struct xfrm_dev_offload *xso; int i, err = 0, cnt = 0; spin_lock_bh(&net->xfrm.xfrm_state_lock); @@ -857,8 +896,6 @@ int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_vali err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { - struct xfrm_state *x; - struct xfrm_dev_offload *xso; restart: hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { xso = &x->xso; @@ -868,6 +905,8 @@ restart: spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = xfrm_state_delete(x); + xfrm_dev_state_free(x); + xfrm_audit_state_delete(x, err ? 0 : 1, task_valid); xfrm_state_put(x); @@ -884,6 +923,24 @@ restart: out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + spin_lock_bh(&xfrm_state_dev_gc_lock); +restart_gc: + hlist_for_each_entry_safe(x, tmp, &xfrm_state_dev_gc_list, dev_gclist) { + xso = &x->xso; + + if (xso->dev == dev) { + spin_unlock_bh(&xfrm_state_dev_gc_lock); + xfrm_dev_state_free(x); + spin_lock_bh(&xfrm_state_dev_gc_lock); + goto restart_gc; + } + + } + spin_unlock_bh(&xfrm_state_dev_gc_lock); + + xfrm_flush_gc(); + return err; } EXPORT_SYMBOL(xfrm_dev_state_flush); -- cgit v1.2.3 From 98f706de445b464f25220360210a4bcb9cc6c41a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:16 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_stream_connect(). While a SOCK_(STREAM|SEQPACKET) socket connect()s to another, we hold two locks of them by unix_state_lock() and unix_state_lock_nested() in unix_stream_connect(). Before unix_state_lock_nested(), the following is guaranteed by checking sk->sk_state: 1. The first socket is TCP_LISTEN 2. The second socket is not the first one 3. Simultaneous connect() must fail So, the client state can be TCP_CLOSE or TCP_LISTEN or TCP_ESTABLISHED. Let's define the expected states as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Note that 2. is detected by debug_spin_lock_before() and 3. cannot be expressed as lock_cmp_fn. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b6eedf7650da..fd813ad73ab8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 88f2c5d039c4..a092d6999ae0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -143,6 +143,41 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a, a = container_of(_a, struct unix_sock, lock.dep_map); b = container_of(_b, struct unix_sock, lock.dep_map); + if (a->sk.sk_state == TCP_LISTEN) { + /* unix_stream_connect(): Before the 2nd unix_state_lock(), + * + * 1. a is TCP_LISTEN. + * 2. b is not a. + * 3. concurrent connect(b -> a) must fail. + * + * Except for 2. & 3., the b's state can be any possible + * value due to concurrent connect() or listen(). + * + * 2. is detected in debug_spin_lock_before(), and 3. cannot + * be expressed as lock_cmp_fn. + */ + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + case TCP_LISTEN: + return -1; + default: + /* Invalid case. */ + return 0; + } + } + + /* Should never happen. Just to be symmetric. */ + if (b->sk.sk_state == TCP_LISTEN) { + switch (b->sk.sk_state) { + case TCP_CLOSE: + case TCP_ESTABLISHED: + return 1; + default: + return 0; + } + } + /* unix_state_double_lock(): ascending address order. */ return cmp_ptr(a, b); } @@ -1585,7 +1620,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk, U_LOCK_SECOND); + unix_state_lock(sk); if (unlikely(sk->sk_state != TCP_CLOSE)) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL; -- cgit v1.2.3 From c4da4661d985fd3cbaea3ea6101e2dd0d2ad4b74 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:18 -0700 Subject: af_unix: Remove U_LOCK_DIAG. sk_diag_dump_icons() acquires embryo's lock by unix_state_lock_nested() to fetch its peer. The embryo's ->peer is set to NULL only when its parent listener is close()d. Then, unix_release_sock() is called for each embryo after unlinking skb by skb_dequeue(). In sk_diag_dump_icons(), we hold the parent's recvq lock, so we need not acquire unix_state_lock_nested(), and peer is always non-NULL. Let's remove unnecessary unix_state_lock_nested() and non-NULL test for peer. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - net/unix/diag.c | 17 +++-------------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd813ad73ab8..c42645199cee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. */ diff --git a/net/unix/diag.c b/net/unix/diag.c index d2d66727b0da..9138af8b465e 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -73,20 +73,9 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) buf = nla_data(attr); i = 0; - skb_queue_walk(&sk->sk_receive_queue, skb) { - struct sock *req, *peer; - - req = skb->sk; - /* - * The state lock is outer for the same sk's - * queue lock. With the other's queue locked it's - * OK to lock the state. - */ - unix_state_lock_nested(req, U_LOCK_DIAG); - peer = unix_sk(req)->peer; - buf[i++] = (peer ? sock_i_ino(peer) : 0); - unix_state_unlock(req); - } + skb_queue_walk(&sk->sk_receive_queue, skb) + buf[i++] = sock_i_ino(unix_peer(skb->sk)); + spin_unlock(&sk->sk_receive_queue.lock); } -- cgit v1.2.3 From 7202cb591624b860bd7b688d4bd0f5bee79c7f44 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:19 -0700 Subject: af_unix: Remove U_LOCK_GC_LISTENER. Commit 1971d13ffa84 ("af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc().") added U_LOCK_GC_LISTENER for the old GC, but it's no longer needed for the new GC. Let's remove U_LOCK_GC_LISTENER and unix_state_lock_nested() as there's no user. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c42645199cee..63129c79b8cb 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -96,18 +96,6 @@ struct unix_sock { #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -enum unix_socket_lock_class { - U_LOCK_NORMAL, - U_LOCK_GC_LISTENER, /* used for listening socket while determining gc - * candidates to close a small race window. - */ -}; - -static inline void unix_state_lock_nested(struct sock *sk, - enum unix_socket_lock_class subclass) -{ - spin_lock_nested(&unix_sk(sk)->lock, subclass); -} #define peer_wait peer_wq.wait -- cgit v1.2.3 From 605efd54b50437ed9f3915690539d0afddca9d95 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:41 +0200 Subject: netfilter: nf_tables: make struct nft_trans first member of derived subtypes There is 'struct nft_trans', the basic structure for all transactional objects, and the the various different transactional objects, such as nft_trans_table, chain, set, set_elem and so on. Right now 'struct nft_trans' uses a flexible member at the tail (data[]), and casting is needed to access the actual type-specific members. Change this to make the hierarchy visible in source code, i.e. make struct nft_trans the first member of all derived subtypes. This has several advantages: 1. pahole output reflects the real size needed by the particular subtype 2. allows to use container_of() to convert the base type to the actual object type instead of casting ->data to the overlay structure. 3. It makes it easy to add intermediate types. 'struct nft_trans' contains a 'binding_list' that is only needed by two subtypes, so it should be part of the two subtypes, not in the base structure. But that makes it hard to interate over the binding_list, because there is no common base structure. A follow patch moves the bind list to a new struct: struct nft_trans_binding { struct nft_trans nft_trans; struct list_head binding_list; }; ... and makes that structure the new 'first member' for both nft_trans_chain and nft_trans_set. No functional change intended in this patch. Some numbers: struct nft_trans { /* size: 88, cachelines: 2, members: 5 */ struct nft_trans_chain { /* size: 152, cachelines: 3, members: 10 */ struct nft_trans_elem { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 128, cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 112, cachelines: 2, members: 5 */ struct nft_trans_set { /* size: 120, cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 96, cachelines: 2, members: 2 */ Of particular interest is nft_trans_elem, which needs to be allocated once for each pending (to be added or removed) set element. Add BUILD_BUG_ON to check struct nft_trans is placed at the top of the container structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 162 ++++++++++++++++++++++---------------- net/netfilter/nf_tables_api.c | 18 +++-- 2 files changed, 105 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..b25df037fceb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1608,14 +1608,16 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) } /** - * struct nft_trans - nf_tables object update in transaction + * struct nft_trans - nf_tables object update in transaction * - * @list: used internally - * @binding_list: list of objects with possible bindings - * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context - * @data: internal information related to the transaction + * @list: used internally + * @binding_list: list of objects with possible bindings + * @msg_type: message type + * @put_net: ctx->net needs to be put + * @ctx: transaction context + * + * This is the information common to all objects in the transaction, + * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; @@ -1623,26 +1625,29 @@ struct nft_trans { int msg_type; bool put_net; struct nft_ctx ctx; - char data[]; }; struct nft_trans_rule { + struct nft_trans nft_trans; struct nft_rule *rule; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; -#define nft_trans_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->rule) -#define nft_trans_flow_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->flow) -#define nft_trans_rule_id(trans) \ - (((struct nft_trans_rule *)trans->data)->rule_id) -#define nft_trans_rule_bound(trans) \ - (((struct nft_trans_rule *)trans->data)->bound) +#define nft_trans_container_rule(trans) \ + container_of(trans, struct nft_trans_rule, nft_trans) +#define nft_trans_rule(trans) \ + nft_trans_container_rule(trans)->rule +#define nft_trans_flow_rule(trans) \ + nft_trans_container_rule(trans)->flow +#define nft_trans_rule_id(trans) \ + nft_trans_container_rule(trans)->rule_id +#define nft_trans_rule_bound(trans) \ + nft_trans_container_rule(trans)->bound struct nft_trans_set { + struct nft_trans nft_trans; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1652,22 +1657,25 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_set(trans) \ - (((struct nft_trans_set *)trans->data)->set) -#define nft_trans_set_id(trans) \ - (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) -#define nft_trans_set_update(trans) \ - (((struct nft_trans_set *)trans->data)->update) -#define nft_trans_set_timeout(trans) \ - (((struct nft_trans_set *)trans->data)->timeout) -#define nft_trans_set_gc_int(trans) \ - (((struct nft_trans_set *)trans->data)->gc_int) -#define nft_trans_set_size(trans) \ - (((struct nft_trans_set *)trans->data)->size) +#define nft_trans_container_set(trans) \ + container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_set(trans) \ + nft_trans_container_set(trans)->set +#define nft_trans_set_id(trans) \ + nft_trans_container_set(trans)->set_id +#define nft_trans_set_bound(trans) \ + nft_trans_container_set(trans)->bound +#define nft_trans_set_update(trans) \ + nft_trans_container_set(trans)->update +#define nft_trans_set_timeout(trans) \ + nft_trans_container_set(trans)->timeout +#define nft_trans_set_gc_int(trans) \ + nft_trans_container_set(trans)->gc_int +#define nft_trans_set_size(trans) \ + nft_trans_container_set(trans)->size struct nft_trans_chain { + struct nft_trans nft_trans; struct nft_chain *chain; bool update; char *name; @@ -1679,73 +1687,87 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_chain(trans) \ - (((struct nft_trans_chain *)trans->data)->chain) -#define nft_trans_chain_update(trans) \ - (((struct nft_trans_chain *)trans->data)->update) -#define nft_trans_chain_name(trans) \ - (((struct nft_trans_chain *)trans->data)->name) -#define nft_trans_chain_stats(trans) \ - (((struct nft_trans_chain *)trans->data)->stats) -#define nft_trans_chain_policy(trans) \ - (((struct nft_trans_chain *)trans->data)->policy) -#define nft_trans_chain_bound(trans) \ - (((struct nft_trans_chain *)trans->data)->bound) -#define nft_trans_chain_id(trans) \ - (((struct nft_trans_chain *)trans->data)->chain_id) -#define nft_trans_basechain(trans) \ - (((struct nft_trans_chain *)trans->data)->basechain) -#define nft_trans_chain_hooks(trans) \ - (((struct nft_trans_chain *)trans->data)->hook_list) +#define nft_trans_container_chain(trans) \ + container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_chain(trans) \ + nft_trans_container_chain(trans)->chain +#define nft_trans_chain_update(trans) \ + nft_trans_container_chain(trans)->update +#define nft_trans_chain_name(trans) \ + nft_trans_container_chain(trans)->name +#define nft_trans_chain_stats(trans) \ + nft_trans_container_chain(trans)->stats +#define nft_trans_chain_policy(trans) \ + nft_trans_container_chain(trans)->policy +#define nft_trans_chain_bound(trans) \ + nft_trans_container_chain(trans)->bound +#define nft_trans_chain_id(trans) \ + nft_trans_container_chain(trans)->chain_id +#define nft_trans_basechain(trans) \ + nft_trans_container_chain(trans)->basechain +#define nft_trans_chain_hooks(trans) \ + nft_trans_container_chain(trans)->hook_list struct nft_trans_table { + struct nft_trans nft_trans; bool update; }; -#define nft_trans_table_update(trans) \ - (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_container_table(trans) \ + container_of(trans, struct nft_trans_table, nft_trans) +#define nft_trans_table_update(trans) \ + nft_trans_container_table(trans)->update struct nft_trans_elem { + struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; bool bound; }; -#define nft_trans_elem_set(trans) \ - (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem_priv(trans) \ - (((struct nft_trans_elem *)trans->data)->elem_priv) -#define nft_trans_elem_set_bound(trans) \ - (((struct nft_trans_elem *)trans->data)->bound) +#define nft_trans_container_elem(t) \ + container_of(t, struct nft_trans_elem, nft_trans) +#define nft_trans_elem_set(trans) \ + nft_trans_container_elem(trans)->set +#define nft_trans_elem_priv(trans) \ + nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_set_bound(trans) \ + nft_trans_container_elem(trans)->bound struct nft_trans_obj { + struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; -#define nft_trans_obj(trans) \ - (((struct nft_trans_obj *)trans->data)->obj) -#define nft_trans_obj_newobj(trans) \ - (((struct nft_trans_obj *)trans->data)->newobj) -#define nft_trans_obj_update(trans) \ - (((struct nft_trans_obj *)trans->data)->update) +#define nft_trans_container_obj(t) \ + container_of(t, struct nft_trans_obj, nft_trans) +#define nft_trans_obj(trans) \ + nft_trans_container_obj(trans)->obj +#define nft_trans_obj_newobj(trans) \ + nft_trans_container_obj(trans)->newobj +#define nft_trans_obj_update(trans) \ + nft_trans_container_obj(trans)->update struct nft_trans_flowtable { + struct nft_trans nft_trans; struct nft_flowtable *flowtable; bool update; struct list_head hook_list; u32 flags; }; -#define nft_trans_flowtable(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flowtable) -#define nft_trans_flowtable_update(trans) \ - (((struct nft_trans_flowtable *)trans->data)->update) -#define nft_trans_flowtable_hooks(trans) \ - (((struct nft_trans_flowtable *)trans->data)->hook_list) -#define nft_trans_flowtable_flags(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flags) +#define nft_trans_container_flowtable(t) \ + container_of(t, struct nft_trans_flowtable, nft_trans) +#define nft_trans_flowtable(trans) \ + nft_trans_container_flowtable(trans)->flowtable +#define nft_trans_flowtable_update(trans) \ + nft_trans_container_flowtable(trans)->update +#define nft_trans_flowtable_hooks(trans) \ + nft_trans_container_flowtable(trans)->hook_list +#define nft_trans_flowtable_flags(trans) \ + nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index be3b4c90d2ed..19edd1bcecef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -153,7 +153,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, gfp); + trans = kzalloc(size, gfp); if (trans == NULL) return NULL; @@ -10348,7 +10348,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); nf_tables_setelem_notify(&trans->ctx, te->set, @@ -10363,7 +10363,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nf_tables_setelem_notify(&trans->ctx, te->set, te->elem_priv, @@ -10643,7 +10643,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_remove(net, te->set, te->elem_priv); if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); @@ -10656,7 +10656,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { nft_setelem_data_activate(net, te->set, te->elem_priv); @@ -11588,6 +11588,14 @@ static int __init nf_tables_module_init(void) { int err; + BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); + err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; -- cgit v1.2.3 From 17d8f3ad36a5fa5c93afab90ed03ba7ec748dd03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:53:16 +0200 Subject: netfilter: nf_tables: move bind list_head into relevant subtypes Only nft_trans_chain and nft_trans_set subtypes use the trans->binding_list member. Add a new common binding subtype and move the member there. This reduces size of all other subtypes by 16 bytes on 64bit platforms. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 26 +++++++++----- net/netfilter/nf_tables_api.c | 71 +++++++++++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b25df037fceb..f72448095833 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,7 +1611,6 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally - * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1621,12 +1620,23 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; - struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; }; +/** + * struct nft_trans_binding - nf_tables object with binding support in transaction + * @nft_trans: base structure, MUST be first member + * @binding_list: list of objects with possible bindings + * + * This is the base type used by objects that can be bound to a chain. + */ +struct nft_trans_binding { + struct nft_trans nft_trans; + struct list_head binding_list; +}; + struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; @@ -1647,7 +1657,7 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->bound struct nft_trans_set { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1657,8 +1667,8 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_container_set(trans) \ - container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_container_set(t) \ + container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) #define nft_trans_set(trans) \ nft_trans_container_set(trans)->set #define nft_trans_set_id(trans) \ @@ -1675,7 +1685,7 @@ struct nft_trans_set { nft_trans_container_set(trans)->size struct nft_trans_chain { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; bool update; char *name; @@ -1687,8 +1697,8 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_container_chain(trans) \ - container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_container_chain(t) \ + container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) #define nft_trans_chain(trans) \ nft_trans_container_chain(trans)->chain #define nft_trans_chain_update(trans) \ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 19edd1bcecef..c950938ef612 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -158,7 +158,6 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); - INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -171,10 +170,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } +static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + case NFT_MSG_NEWSET: + return container_of(trans, struct nft_trans_binding, nft_trans); + } + + return NULL; +} + static void nft_trans_list_del(struct nft_trans *trans) { + struct nft_trans_binding *trans_binding; + list_del(&trans->list); - list_del(&trans->binding_list); + + trans_binding = nft_trans_get_binding(trans); + if (trans_binding) + list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) @@ -372,21 +387,26 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *binding; + + list_add_tail(&trans->list, &nft_net->commit_list); + + binding = nft_trans_get_binding(trans); + if (!binding) + return; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } - - list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -416,11 +436,27 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans * +nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { + struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (!trans) + return NULL; + + trans_chain = nft_trans_container_chain(trans); + INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + + return trans; +} + +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); @@ -560,12 +596,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = nft_trans_container_set(trans); + INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -2698,8 +2738,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } err = -ENOMEM; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; @@ -2915,8 +2954,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, list_move(&hook->list, &chain_del_list); } - trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; @@ -10147,6 +10185,7 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); @@ -10161,7 +10200,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } - list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { + trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && @@ -10179,6 +10219,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + default: + WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); + break; } } @@ -11589,9 +11632,9 @@ static int __init nf_tables_module_init(void) int err; BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); - BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); - BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); -- cgit v1.2.3 From b3f4c216f7af37fa60e50d2ebb3ec9dd0f93886c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:43 +0200 Subject: netfilter: nf_tables: compact chain+ft transaction objects Cover holes to reduce both structures by 8 byte. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f72448095833..1f0607b671ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1687,10 +1687,10 @@ struct nft_trans_set { struct nft_trans_chain { struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; - bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool update; bool bound; u32 chain_id; struct nft_base_chain *basechain; @@ -1763,9 +1763,9 @@ struct nft_trans_obj { struct nft_trans_flowtable { struct nft_trans nft_trans; struct nft_flowtable *flowtable; - bool update; struct list_head hook_list; u32 flags; + bool update; }; #define nft_trans_container_flowtable(t) \ -- cgit v1.2.3 From 8965d42bcf54d42cbc72fe34a9d0ec3f8527debd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:45 +0200 Subject: netfilter: nf_tables: pass nft_chain to destroy function, not nft_ctx It would be better to not store nft_ctx inside nft_trans object, the netlink ctx strucutre is huge and most of its information is never needed in places that use trans->ctx. Avoid/reduce its usage if possible, no runtime behaviour change intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 17 ++++++++--------- net/netfilter/nft_immediate.c | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1f0607b671ac..328fdc140551 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1171,7 +1171,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain) int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); -void nf_tables_chain_destroy(struct nft_ctx *ctx); +void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 60c435774db8..bdc2d7f781ca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2118,9 +2118,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->blob_next); } -void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = ctx->chain; + const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) @@ -2132,7 +2132,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -2621,7 +2621,7 @@ err_chain_add: err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(chain); return err; } @@ -9532,7 +9532,7 @@ static void nft_commit_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: @@ -10524,7 +10524,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); @@ -11411,7 +11411,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(ctx->chain); return 0; } @@ -11486,10 +11486,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; nft_chain_del(chain); nft_use_dec(&table->use); - nf_tables_chain_destroy(&ctx); + nf_tables_chain_destroy(chain); } nf_tables_table_destroy(&ctx); } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6475c7abc1fe..ac2422c215e5 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } - nf_tables_chain_destroy(&chain_ctx); + nf_tables_chain_destroy(chain); break; default: break; -- cgit v1.2.3 From 13f20bc9ec4f9f25935bf52337d3d1708787bd55 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:57:03 +0200 Subject: netfilter: nf_tables: store chain pointer in rule transaction Currently the chain can be derived from trans->ctx.chain, but the ctx will go away soon. Thus add the chain pointer to nft_trans_rule structure itself. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 21 +++++++++++---------- net/netfilter/nf_tables_offload.c | 16 ++++++++-------- 3 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 328fdc140551..86e6bd63a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1640,6 +1640,7 @@ struct nft_trans_binding { struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; + struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; @@ -1655,6 +1656,8 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->rule_id #define nft_trans_rule_bound(trans) \ nft_trans_container_rule(trans)->bound +#define nft_trans_rule_chain(trans) \ + nft_trans_container_rule(trans)->chain struct nft_trans_set { struct nft_trans_binding nft_trans_binding; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index f4e39816104f..3e5980f0bf71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -251,7 +251,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx, nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: - if (trans->ctx.chain == chain) + if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } @@ -541,6 +541,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; + nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -4227,7 +4228,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && - trans->ctx.chain == chain && + nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } @@ -9684,7 +9685,7 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - struct nft_chain *chain = trans->ctx.chain; + struct nft_chain *chain = nft_trans_rule_chain(trans); kvfree(chain->blob_next); chain->blob_next = NULL; @@ -10250,7 +10251,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - chain = trans->ctx.chain; + chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { @@ -10346,7 +10347,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10361,7 +10362,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_rule(trans), NFT_TRANS_COMMIT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: @@ -10645,20 +10646,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.chain->use); + nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nft_use_inc_restore(&trans->ctx.chain->use); + nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 8d892a0d2438..0619feb10abb 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -533,18 +533,18 @@ static void nft_flow_rule_offload_abort(struct net *net, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); @@ -586,7 +586,7 @@ int nft_flow_rule_offload_commit(struct net *net) FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; if (trans->ctx.flags & NLM_F_REPLACE || @@ -594,16 +594,16 @@ int nft_flow_rule_offload_commit(struct net *net) err = -EOPNOTSUPP; break; } - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; -- cgit v1.2.3 From e169285f8c56b8d5702475de0582dc83650c6cee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:51 +0200 Subject: netfilter: nf_tables: do not store nft_ctx in transaction objects nft_ctx is huge and most of the information stored within isn't used at all. Remove nft_ctx member from the base transaction structure and store only what is needed. After this change, relevant struct sizes are: struct nft_trans_chain { /* size: 120 (-32), cachelines: 2, members: 10 */ struct nft_trans_elem { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 80 (-48), cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 80 (-32), cachelines: 2, members: 6 */ struct nft_trans_set { /* size: 96 (-24), cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 56 (-40), cachelines: 1, members: 2 */ struct nft_trans_elem can now be allocated from kmalloc-96 instead of kmalloc-128 slab. A further reduction by 8 bytes would even allow for kmalloc-64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 43 ++++++++++-- net/netfilter/nf_tables_api.c | 140 ++++++++++++++++++++++---------------- net/netfilter/nf_tables_offload.c | 8 +-- 3 files changed, 125 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 86e6bd63a205..1e8da1b882ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,18 +1611,26 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @net: struct net + * @table: struct nft_table the object resides in * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context + * @seq: netlink sequence number + * @flags: modifiers to new request + * @report: notify via unicast netlink message + * @put_net: net needs to be put * * This is the information common to all objects in the transaction, * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; + struct net *net; + struct nft_table *table; int msg_type; - bool put_net; - struct nft_ctx ctx; + u32 seq; + u16 flags; + u8 report:1; + u8 put_net:1; }; /** @@ -1794,6 +1802,33 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline void nft_ctx_update(struct nft_ctx *ctx, + const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: + ctx->chain = nft_trans_rule_chain(trans); + break; + case NFT_MSG_NEWCHAIN: + case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: + ctx->chain = nft_trans_chain(trans); + break; + default: + ctx->chain = NULL; + break; + } + + ctx->net = trans->net; + ctx->table = trans->table; + ctx->family = trans->table->family; + ctx->report = trans->report; + ctx->flags = trans->flags; + ctx->seq = trans->seq; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6958f922f95a..02d75aefaa8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -159,7 +159,12 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, INIT_LIST_HEAD(&trans->list); trans->msg_type = msg_type; - trans->ctx = *ctx; + + trans->net = ctx->net; + trans->table = ctx->table; + trans->seq = ctx->seq; + trans->flags = ctx->flags; + trans->report = ctx->report; return trans; } @@ -1258,7 +1263,7 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.table == ctx->table && + if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && @@ -2100,7 +2105,7 @@ static void nft_chain_stats_replace(struct nft_trans_chain *trans) trans->stats = rcu_replace_pointer(chain->stats, trans->stats, - lockdep_commit_lock_is_held(t->ctx.net)); + lockdep_commit_lock_is_held(t->net)); if (!trans->stats) static_branch_inc(&nft_counters_enabled); @@ -2766,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && - tmp->ctx.table == table && + tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { @@ -9472,7 +9477,7 @@ static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) static void nft_chain_commit_update(struct nft_trans_chain *trans) { - struct nft_table *table = trans->nft_trans_binding.nft_trans.ctx.table; + struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; if (trans->name) { @@ -9501,7 +9506,8 @@ static void nft_chain_commit_update(struct nft_trans_chain *trans) } } -static void nft_obj_commit_update(struct nft_trans *trans) +static void nft_obj_commit_update(const struct nft_ctx *ctx, + struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; @@ -9513,15 +9519,21 @@ static void nft_obj_commit_update(struct nft_trans *trans) return; obj->ops->update(obj, newobj); - nft_obj_destroy(&trans->ctx, newobj); + nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { + struct nft_ctx ctx = { + .net = trans->net, + }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(trans->ctx.table); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -9536,21 +9548,21 @@ static void nft_commit_release(struct nft_trans *trans) break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&trans->ctx, + nf_tables_set_elem_destroy(&ctx, nft_trans_elem_set(trans), nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: @@ -9562,7 +9574,7 @@ static void nft_commit_release(struct nft_trans *trans) } if (trans->put_net) - put_net(trans->ctx.net); + put_net(trans->net); kfree(trans); } @@ -10042,7 +10054,7 @@ static void nf_tables_commit_release(struct net *net) trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); - get_net(trans->ctx.net); + get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; @@ -10186,6 +10198,7 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + const struct nlmsghdr *nlh = nlmsg_hdr(skb); struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; @@ -10193,6 +10206,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + struct nft_ctx ctx; LIST_HEAD(adl); int err; @@ -10201,6 +10215,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { trans = &trans_binding->nft_trans; switch (trans->msg_type) { @@ -10238,7 +10254,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; int ret; ret = nf_tables_commit_audit_alloc(&adl, table); @@ -10282,7 +10298,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { @@ -10299,18 +10317,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } else { nft_clear(net, table); } - nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: list_del_rcu(&table->list); - nf_tables_table_notify(&trans->ctx, trans->msg_type); + nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { nft_chain_commit_update(nft_trans_container_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); @@ -10318,14 +10336,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } else { nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); nft_clear(net, nft_trans_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, @@ -10334,16 +10352,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } else { nft_chain_del(nft_trans_chain(trans)); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, table, + nf_tables_unregister_hook(ctx.net, ctx.table, nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nft_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); @@ -10353,11 +10370,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); - nft_rule_expr_deactivate(&trans->ctx, - nft_trans_rule(trans), + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) @@ -10381,7 +10396,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) !list_empty(&nft_trans_set(trans)->bindings)) nft_use_dec(&table->use); } - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; @@ -10389,14 +10404,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && @@ -10410,7 +10425,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSETELEM: te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, trans->msg_type); nft_setelem_remove(net, te->set, te->elem_priv); @@ -10426,13 +10441,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_commit_update(trans); - nf_tables_obj_notify(&trans->ctx, + nft_obj_commit_update(&ctx, trans); + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); @@ -10441,14 +10456,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); @@ -10456,7 +10471,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); @@ -10466,7 +10481,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); @@ -10474,7 +10489,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); @@ -10516,9 +10531,13 @@ static void nf_tables_module_autoload(struct net *net) static void nf_tables_abort_release(struct nft_trans *trans) { + struct nft_ctx ctx = { }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(trans->ctx.table); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) @@ -10527,17 +10546,17 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) @@ -10569,6 +10588,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + struct nft_ctx ctx = { + .net = net, + }; int err = 0; if (action == NFNL_ABORT_VALIDATE && @@ -10577,7 +10599,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - struct nft_table *table = trans->ctx.table; + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -10604,7 +10628,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, table); + nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: @@ -10624,7 +10648,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } nft_use_dec_restore(&table->use); nft_chain_del(nft_trans_chain(trans)); - nf_tables_unregister_hook(trans->ctx.net, table, + nf_tables_unregister_hook(trans->net, table, nft_trans_chain(trans)); } break; @@ -10635,7 +10659,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) &nft_trans_basechain(trans)->hook_list); } else { nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_chain(trans)); + nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; @@ -10646,7 +10670,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) } nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) @@ -10655,8 +10679,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); + nft_clear(trans->net, nft_trans_rule(trans)); + nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); @@ -10678,9 +10702,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_set(trans)); + nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) - nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -10720,7 +10744,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { nft_use_dec_restore(&table->use); @@ -10730,7 +10754,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: @@ -10751,7 +10775,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) &nft_trans_flowtable(trans)->hook_list); } else { nft_use_inc_restore(&table->use); - nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 0619feb10abb..64675f1c7f29 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -513,7 +513,7 @@ static void nft_flow_rule_offload_abort(struct net *net, int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { @@ -564,7 +564,7 @@ int nft_flow_rule_offload_commit(struct net *net) u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { @@ -589,8 +589,8 @@ int nft_flow_rule_offload_commit(struct net *net) if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) { + if (trans->flags & NLM_F_REPLACE || + !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } -- cgit v1.2.3 From e29630247be24c3987e2b048f8e152771b32d38b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Jun 2024 20:16:59 +0200 Subject: netfilter: nf_tables: rise cap on SELinux secmark context secmark context is artificially limited 256 bytes, rise it to 4Kbytes. Fixes: fb961945457f ("netfilter: nf_tables: add SECMARK support") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa4094ca2444..639894ed1b97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1376,7 +1376,7 @@ enum nft_secmark_attributes { #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) /* Max security context length */ -#define NFT_SECMARK_CTX_MAXLEN 256 +#define NFT_SECMARK_CTX_MAXLEN 4096 /** * enum nft_reject_types - nf_tables reject expression reject types -- cgit v1.2.3 From 0e942053e4dc42a760f48c1981f3239825430f15 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:49 +0800 Subject: linux/dim: move useful macros to .h file Useful macros will be used effectively elsewhere. These will be utilized in subsequent patches. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-2-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 7 +++++++ lib/dim/net_dim.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index f343bc9aa2ec..43398f5eade2 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -10,6 +10,13 @@ #include #include +/* Number of DIM profiles and period mode. */ +#define NET_DIM_PARAMS_NUM_PROFILES 5 +#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 +#define NET_DIM_DEF_PROFILE_CQE 1 +#define NET_DIM_DEF_PROFILE_EQE 1 + /* * Number of events between DIM iterations. * Causes a moderation of the algorithm run. diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 4e32f7aaac86..67d5beb34dc3 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -11,12 +11,6 @@ * There are different set of profiles for RX/TX CQs. * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES */ -#define NET_DIM_PARAMS_NUM_PROFILES 5 -#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 -#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 -#define NET_DIM_DEF_PROFILE_CQE 1 -#define NET_DIM_DEF_PROFILE_EQE 1 - #define NET_DIM_RX_EQE_PROFILES { \ {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ -- cgit v1.2.3 From f750dfe825b904164688adeb147950e0e0c4d262 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:51 +0800 Subject: ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 31 +++ Documentation/networking/ethtool-netlink.rst | 8 + Documentation/networking/net_dim.rst | 42 +++++ include/linux/dim.h | 58 ++++++ include/linux/ethtool.h | 4 +- include/linux/netdevice.h | 3 + include/uapi/linux/ethtool_netlink.h | 22 +++ lib/dim/net_dim.c | 70 +++++++ net/Kconfig | 1 + net/ethtool/coalesce.c | 273 ++++++++++++++++++++++++++- 10 files changed, 509 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 00dc61358be8..6c2ab3d1c22f 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -414,6 +414,26 @@ attribute-sets: name: combined-count type: u32 + - + name: irq-moderation + attributes: + - + name: usec + type: u32 + - + name: pkts + type: u32 + - + name: comps + type: u32 + - + name: profile + attributes: + - + name: irq-moderation + type: nest + multi-attr: true + nested-attributes: irq-moderation - name: coalesce attributes: @@ -502,6 +522,15 @@ attribute-sets: - name: tx-aggr-time-usecs type: u32 + - + name: rx-profile + type: nest + nested-attributes: profile + - + name: tx-profile + type: nest + nested-attributes: profile + - name: pause-stat attributes: @@ -1325,6 +1354,8 @@ operations: - tx-aggr-max-bytes - tx-aggr-max-frames - tx-aggr-time-usecs + - rx-profile + - tx-profile dump: *coalesce-get-op - name: coalesce-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 160bfb0ae8ba..7ec08e903bab 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1033,6 +1033,8 @@ Kernel response contents: ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES`` u32 max aggr size, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES`` u32 max aggr packets, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx + ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx + ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx =========================================== ====== ======================= Attributes are only included in reply if their value is not zero or the @@ -1062,6 +1064,10 @@ block should be sent. This feature is mainly of interest for specific USB devices which does not cope well with frequent small-sized URBs transmissions. +``ETHTOOL_A_COALESCE_RX_PROFILE`` and ``ETHTOOL_A_COALESCE_TX_PROFILE`` refer +to DIM parameters, see `Generic Network Dynamic Interrupt Moderation (Net DIM) +`_. + COALESCE_SET ============ @@ -1098,6 +1104,8 @@ Request contents: ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES`` u32 max aggr size, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES`` u32 max aggr packets, Tx ``ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS`` u32 time (us), aggr, Tx + ``ETHTOOL_A_COALESCE_RX_PROFILE`` nested profile of DIM, Rx + ``ETHTOOL_A_COALESCE_TX_PROFILE`` nested profile of DIM, Tx =========================================== ====== ======================= Request is rejected if it attributes declared as unsupported by driver (i.e. diff --git a/Documentation/networking/net_dim.rst b/Documentation/networking/net_dim.rst index 3bed9fd95336..8908fd7b0a8d 100644 --- a/Documentation/networking/net_dim.rst +++ b/Documentation/networking/net_dim.rst @@ -169,6 +169,48 @@ usage is not complete but it should make the outline of the usage clear. ... } + +Tuning DIM +========== + +Net DIM serves a range of network devices and delivers excellent acceleration +benefits. Yet, it has been observed that some preset configurations of DIM may +not align seamlessly with the varying specifications of network devices, and +this discrepancy has been identified as a factor to the suboptimal performance +outcomes of DIM-enabled network devices, related to a mismatch in profiles. + +To address this issue, Net DIM introduces a per-device control to modify and +access a device's ``rx-profile`` and ``tx-profile`` parameters: +Assume that the target network device is named ethx, and ethx only declares +support for RX profile setting and supports modification of ``usec`` field +and ``pkts`` field (See the data structure: +:c:type:`struct dim_cq_moder `). + +You can use ethtool to modify the current RX DIM profile where all +values are 64:: + + $ ethtool -C ethx rx-profile 1,1,n_2,2,n_3,n,n_n,4,n_n,n,n + +``n`` means do not modify this field, and ``_`` separates structure +elements of the profile array. + +Querying the current profiles using:: + + $ ethtool -c ethx + ... + rx-profile: + {.usec = 1, .pkts = 1, .comps = n/a,}, + {.usec = 2, .pkts = 2, .comps = n/a,}, + {.usec = 3, .pkts = 64, .comps = n/a,}, + {.usec = 64, .pkts = 4, .comps = n/a,}, + {.usec = 64, .pkts = 64, .comps = n/a,} + tx-profile: n/a + +If the network device does not support specific fields of DIM profiles, +the corresponding ``n/a`` will display. If the ``n/a`` field is being +modified, error messages will be reported. + + Dynamic Interrupt Moderation (DIM) library API ============================================== diff --git a/include/linux/dim.h b/include/linux/dim.h index 43398f5eade2..e0f39bd85432 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -10,6 +10,8 @@ #include #include +struct net_device; + /* Number of DIM profiles and period mode. */ #define NET_DIM_PARAMS_NUM_PROFILES 5 #define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 @@ -45,12 +47,45 @@ * @pkts: CQ packet counter suggestion (by DIM) * @comps: Completion counter * @cq_period_mode: CQ period count mode (from CQE/EQE) + * @rcu: for asynchronous kfree_rcu */ struct dim_cq_moder { u16 usec; u16 pkts; u16 comps; u8 cq_period_mode; + struct rcu_head rcu; +}; + +#define DIM_PROFILE_RX BIT(0) /* support rx profile modification */ +#define DIM_PROFILE_TX BIT(1) /* support tx profile modification */ + +#define DIM_COALESCE_USEC BIT(0) /* support usec field modification */ +#define DIM_COALESCE_PKTS BIT(1) /* support pkts field modification */ +#define DIM_COALESCE_COMPS BIT(2) /* support comps field modification */ + +/** + * struct dim_irq_moder - Structure for irq moderation information. + * Used to collect irq moderation related information. + * + * @profile_flags: DIM_PROFILE_* + * @coal_flags: DIM_COALESCE_* for Rx and Tx + * @dim_rx_mode: Rx DIM period count mode: CQE or EQE + * @dim_tx_mode: Tx DIM period count mode: CQE or EQE + * @rx_profile: DIM profile list for Rx + * @tx_profile: DIM profile list for Tx + * @rx_dim_work: Rx DIM worker scheduled by net_dim() + * @tx_dim_work: Tx DIM worker scheduled by net_dim() + */ +struct dim_irq_moder { + u8 profile_flags; + u8 coal_flags; + u8 dim_rx_mode; + u8 dim_tx_mode; + struct dim_cq_moder __rcu *rx_profile; + struct dim_cq_moder __rcu *tx_profile; + void (*rx_dim_work)(struct work_struct *work); + void (*tx_dim_work)(struct work_struct *work); }; /** @@ -198,6 +233,29 @@ enum dim_step_result { DIM_ON_EDGE, }; +/** + * net_dim_init_irq_moder - collect information to initialize irq moderation + * @dev: target network device + * @profile_flags: Rx or Tx profile modification capability + * @coal_flags: irq moderation params flags + * @rx_mode: CQ period mode for Rx + * @tx_mode: CQ period mode for Tx + * @rx_dim_work: Rx worker called after dim decision + * @tx_dim_work: Tx worker called after dim decision + * + * Return: 0 on success or a negative error code. + */ +int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, + u8 coal_flags, u8 rx_mode, u8 tx_mode, + void (*rx_dim_work)(struct work_struct *work), + void (*tx_dim_work)(struct work_struct *work)); + +/** + * net_dim_free_irq_moder - free fields for irq moderation + * @dev: target network device + */ +void net_dim_free_irq_moder(struct net_device *dev); + /** * dim_on_top - check if current state is a good place to stop (top location) * @dim: DIM context diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6fd9107d3cc0..959196af7f5a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -284,7 +284,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES BIT(24) #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES BIT(25) #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(26, 0) +#define ETHTOOL_COALESCE_RX_PROFILE BIT(27) +#define ETHTOOL_COALESCE_TX_PROFILE BIT(28) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(28, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4e81660b4462..cc18acd3c58b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2402,6 +2402,9 @@ struct net_device { /** @page_pools: page pools created for this netdevice */ struct hlist_head page_pools; #endif + + /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ + struct dim_irq_moder *irq_moder; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b49b804b9495..d15856c7e001 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -415,12 +415,34 @@ enum { ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + /* PAUSE */ enum { diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 67d5beb34dc3..0cd41277c7a3 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -4,6 +4,7 @@ */ #include +#include /* * Net DIM profiles: @@ -95,6 +96,75 @@ net_dim_get_def_tx_moderation(u8 cq_period_mode) } EXPORT_SYMBOL(net_dim_get_def_tx_moderation); +int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, + u8 coal_flags, u8 rx_mode, u8 tx_mode, + void (*rx_dim_work)(struct work_struct *work), + void (*tx_dim_work)(struct work_struct *work)) +{ + struct dim_cq_moder *rxp = NULL, *txp; + struct dim_irq_moder *moder; + int len; + + dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL); + if (!dev->irq_moder) + return -ENOMEM; + + moder = dev->irq_moder; + len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile); + + moder->coal_flags = coal_flags; + moder->profile_flags = profile_flags; + + if (profile_flags & DIM_PROFILE_RX) { + moder->rx_dim_work = rx_dim_work; + moder->dim_rx_mode = rx_mode; + rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL); + if (!rxp) + goto free_moder; + + rcu_assign_pointer(moder->rx_profile, rxp); + } + + if (profile_flags & DIM_PROFILE_TX) { + moder->tx_dim_work = tx_dim_work; + moder->dim_tx_mode = tx_mode; + txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL); + if (!txp) + goto free_rxp; + + rcu_assign_pointer(moder->tx_profile, txp); + } + + return 0; + +free_rxp: + kfree(rxp); +free_moder: + kfree(moder); + return -ENOMEM; +} +EXPORT_SYMBOL(net_dim_init_irq_moder); + +/* RTNL lock is held. */ +void net_dim_free_irq_moder(struct net_device *dev) +{ + struct dim_cq_moder *rxp, *txp; + + if (!dev->irq_moder) + return; + + rxp = rtnl_dereference(dev->irq_moder->rx_profile); + txp = rtnl_dereference(dev->irq_moder->tx_profile); + + rcu_assign_pointer(dev->irq_moder->rx_profile, NULL); + rcu_assign_pointer(dev->irq_moder->tx_profile, NULL); + + kfree_rcu(rxp, rcu); + kfree_rcu(txp, rcu); + kfree(dev->irq_moder); +} +EXPORT_SYMBOL(net_dim_free_irq_moder); + static int net_dim_step(struct dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) diff --git a/net/Kconfig b/net/Kconfig index 9fe65fa26e48..d27d0deac0bf 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -508,6 +508,7 @@ config FAILOVER config ETHTOOL_NETLINK bool "Netlink interface for ethtool" + select DIMLIB default y help An alternative userspace interface for ethtool based on generic diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 83112c1a71ae..759b16e3d134 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "netlink.h" #include "common.h" @@ -82,6 +83,14 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base, static int coalesce_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { + int modersz = nla_total_size(0) + /* _PROFILE_IRQ_MODERATION, nest */ + nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_USEC */ + nla_total_size(sizeof(u32)) + /* _IRQ_MODERATION_PKTS */ + nla_total_size(sizeof(u32)); /* _IRQ_MODERATION_COMPS */ + + int total_modersz = nla_total_size(0) + /* _{R,T}X_PROFILE, nest */ + modersz * NET_DIM_PARAMS_NUM_PROFILES; + return nla_total_size(sizeof(u32)) + /* _RX_USECS */ nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _RX_USECS_IRQ */ @@ -108,7 +117,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */ - nla_total_size(sizeof(u32)); /* _TX_AGGR_TIME_USECS */ + nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */ + total_modersz * 2; /* _{R,T}X_PROFILE */ } static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val, @@ -127,14 +137,84 @@ static bool coalesce_put_bool(struct sk_buff *skb, u16 attr_type, u32 val, return nla_put_u8(skb, attr_type, !!val); } +/** + * coalesce_put_profile - fill reply with a nla nest with four child nla nests. + * @skb: socket buffer the message is stored in + * @attr_type: nest attr type ETHTOOL_A_COALESCE_*X_PROFILE + * @profile: data passed to userspace + * @coal_flags: modifiable parameters supported by the driver + * + * Put a dim profile nest attribute. Refer to ETHTOOL_A_PROFILE_IRQ_MODERATION. + * + * Return: 0 on success or a negative error code. + */ +static int coalesce_put_profile(struct sk_buff *skb, u16 attr_type, + const struct dim_cq_moder *profile, + u8 coal_flags) +{ + struct nlattr *profile_attr, *moder_attr; + int i, ret; + + if (!profile || !coal_flags) + return 0; + + profile_attr = nla_nest_start(skb, attr_type); + if (!profile_attr) + return -EMSGSIZE; + + for (i = 0; i < NET_DIM_PARAMS_NUM_PROFILES; i++) { + moder_attr = nla_nest_start(skb, + ETHTOOL_A_PROFILE_IRQ_MODERATION); + if (!moder_attr) { + ret = -EMSGSIZE; + goto cancel_profile; + } + + if (coal_flags & DIM_COALESCE_USEC) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_USEC, + profile[i].usec); + if (ret) + goto cancel_moder; + } + + if (coal_flags & DIM_COALESCE_PKTS) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_PKTS, + profile[i].pkts); + if (ret) + goto cancel_moder; + } + + if (coal_flags & DIM_COALESCE_COMPS) { + ret = nla_put_u32(skb, ETHTOOL_A_IRQ_MODERATION_COMPS, + profile[i].comps); + if (ret) + goto cancel_moder; + } + + nla_nest_end(skb, moder_attr); + } + + nla_nest_end(skb, profile_attr); + + return 0; + +cancel_moder: + nla_nest_cancel(skb, moder_attr); +cancel_profile: + nla_nest_cancel(skb, profile_attr); + return ret; +} + static int coalesce_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; + struct dim_irq_moder *moder = req_base->dev->irq_moder; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; + int ret = 0; if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS, coal->rx_coalesce_usecs, supported) || @@ -192,11 +272,41 @@ static int coalesce_fill_reply(struct sk_buff *skb, kcoal->tx_aggr_time_usecs, supported)) return -EMSGSIZE; - return 0; + if (!moder) + return 0; + + rcu_read_lock(); + if (moder->profile_flags & DIM_PROFILE_RX) { + ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE, + rcu_dereference(moder->rx_profile), + moder->coal_flags); + if (ret) + goto out; + } + + if (moder->profile_flags & DIM_PROFILE_TX) + ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_TX_PROFILE, + rcu_dereference(moder->tx_profile), + moder->coal_flags); + +out: + rcu_read_unlock(); + return ret; } /* COALESCE_SET */ +static const struct nla_policy coalesce_irq_moderation_policy[] = { + [ETHTOOL_A_IRQ_MODERATION_USEC] = { .type = NLA_U32 }, + [ETHTOOL_A_IRQ_MODERATION_PKTS] = { .type = NLA_U32 }, + [ETHTOOL_A_IRQ_MODERATION_COMPS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy coalesce_profile_policy[] = { + [ETHTOOL_A_PROFILE_IRQ_MODERATION] = + NLA_POLICY_NESTED(coalesce_irq_moderation_policy), +}; + const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), @@ -227,6 +337,10 @@ const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_PROFILE] = + NLA_POLICY_NESTED(coalesce_profile_policy), + [ETHTOOL_A_COALESCE_TX_PROFILE] = + NLA_POLICY_NESTED(coalesce_profile_policy), }; static int @@ -234,6 +348,7 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; + struct dim_irq_moder *irq_moder = req_info->dev->irq_moder; struct nlattr **tb = info->attrs; u32 supported_params; u16 a; @@ -243,6 +358,12 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, /* make sure that only supported parameters are present */ supported_params = ops->supported_coalesce_params; + if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_RX) + supported_params |= ETHTOOL_COALESCE_RX_PROFILE; + + if (irq_moder && irq_moder->profile_flags & DIM_PROFILE_TX) + supported_params |= ETHTOOL_COALESCE_TX_PROFILE; + for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++) if (tb[a] && !(supported_params & attr_to_mask(a))) { NL_SET_ERR_MSG_ATTR(info->extack, tb[a], @@ -253,6 +374,138 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info, return 1; } +/** + * ethnl_update_irq_moder - update a specific field in the given profile + * @irq_moder: place that collects dim related information + * @irq_field: field in profile to modify + * @attr_type: attr type ETHTOOL_A_IRQ_MODERATION_* + * @tb: netlink attribute with new values or null + * @coal_bit: DIM_COALESCE_* bit from coal_flags + * @mod: pointer to bool for modification tracking + * @extack: netlink extended ack + * + * Return: 0 on success or a negative error code. + */ +static int ethnl_update_irq_moder(struct dim_irq_moder *irq_moder, + u16 *irq_field, u16 attr_type, + struct nlattr **tb, + u8 coal_bit, bool *mod, + struct netlink_ext_ack *extack) +{ + int ret = 0; + u32 val; + + if (!tb[attr_type]) + return 0; + + if (irq_moder->coal_flags & coal_bit) { + val = nla_get_u32(tb[attr_type]); + if (*irq_field == val) + return 0; + + *irq_field = val; + *mod = true; + } else { + NL_SET_BAD_ATTR(extack, tb[attr_type]); + ret = -EOPNOTSUPP; + } + + return ret; +} + +/** + * ethnl_update_profile - get a profile nest with child nests from userspace. + * @dev: netdevice to update the profile + * @dst: profile get from the driver and modified by ethnl_update_profile. + * @nests: nest attr ETHTOOL_A_COALESCE_*X_PROFILE to set profile. + * @mod: pointer to bool for modification tracking + * @extack: Netlink extended ack + * + * Layout of nests: + * Nested ETHTOOL_A_COALESCE_*X_PROFILE attr + * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr + * ETHTOOL_A_IRQ_MODERATION_USEC attr + * ETHTOOL_A_IRQ_MODERATION_PKTS attr + * ETHTOOL_A_IRQ_MODERATION_COMPS attr + * ... + * Nested ETHTOOL_A_PROFILE_IRQ_MODERATION attr + * ETHTOOL_A_IRQ_MODERATION_USEC attr + * ETHTOOL_A_IRQ_MODERATION_PKTS attr + * ETHTOOL_A_IRQ_MODERATION_COMPS attr + * + * Return: 0 on success or a negative error code. + */ +static int ethnl_update_profile(struct net_device *dev, + struct dim_cq_moder __rcu **dst, + const struct nlattr *nests, + bool *mod, + struct netlink_ext_ack *extack) +{ + int len_irq_moder = ARRAY_SIZE(coalesce_irq_moderation_policy); + struct nlattr *tb[ARRAY_SIZE(coalesce_irq_moderation_policy)]; + struct dim_irq_moder *irq_moder = dev->irq_moder; + struct dim_cq_moder *new_profile, *old_profile; + int ret, rem, i = 0, len; + struct nlattr *nest; + + if (!nests) + return 0; + + if (!*dst) + return -EOPNOTSUPP; + + old_profile = rtnl_dereference(*dst); + len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*old_profile); + new_profile = kmemdup(old_profile, len, GFP_KERNEL); + if (!new_profile) + return -ENOMEM; + + nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, + nests, rem) { + ret = nla_parse_nested(tb, len_irq_moder - 1, nest, + coalesce_irq_moderation_policy, + extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].usec, + ETHTOOL_A_IRQ_MODERATION_USEC, + tb, DIM_COALESCE_USEC, + mod, extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].pkts, + ETHTOOL_A_IRQ_MODERATION_PKTS, + tb, DIM_COALESCE_PKTS, + mod, extack); + if (ret) + goto err_out; + + ret = ethnl_update_irq_moder(irq_moder, &new_profile[i].comps, + ETHTOOL_A_IRQ_MODERATION_COMPS, + tb, DIM_COALESCE_COMPS, + mod, extack); + if (ret) + goto err_out; + + i++; + } + + /* After the profile is modified, dim itself is a dynamic + * mechanism and will quickly fit to the appropriate + * coalescing parameters according to the new profile. + */ + rcu_assign_pointer(*dst, new_profile); + kfree_rcu(old_profile, rcu); + + return 0; + +err_out: + kfree(new_profile); + return ret; +} + static int __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, bool *dual_change) @@ -317,6 +570,22 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs, tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod); + if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) { + ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile, + tb[ETHTOOL_A_COALESCE_RX_PROFILE], + &mod, info->extack); + if (ret < 0) + return ret; + } + + if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_TX) { + ret = ethnl_update_profile(dev, &dev->irq_moder->tx_profile, + tb[ETHTOOL_A_COALESCE_TX_PROFILE], + &mod, info->extack); + if (ret < 0) + return ret; + } + /* Update operation modes */ ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce, tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode); -- cgit v1.2.3 From 13ba28c5cd047e272f9dbbeb5c62c403873d8987 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:52 +0800 Subject: dim: add new interfaces for initialization and getting results DIM-related mode and work have been collected in one same place, so new interfaces are added to provide convenience. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-5-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 48 +++++++++++++++++++++++++++++++++++++ lib/dim/net_dim.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index e0f39bd85432..1b581ff25a15 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -256,6 +256,54 @@ int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, */ void net_dim_free_irq_moder(struct net_device *dev); +/** + * net_dim_setting - initialize DIM's cq mode and schedule worker + * @dev: target network device + * @dim: DIM context + * @is_tx: true indicates the tx direction, false indicates the rx direction + */ +void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx); + +/** + * net_dim_work_cancel - synchronously cancel dim's worker + * @dim: DIM context + */ +void net_dim_work_cancel(struct dim *dim); + +/** + * net_dim_get_rx_irq_moder - get DIM rx results based on profile_ix + * @dev: target network device + * @dim: DIM context + * + * Return: DIM irq moderation + */ +struct dim_cq_moder +net_dim_get_rx_irq_moder(struct net_device *dev, struct dim *dim); + +/** + * net_dim_get_tx_irq_moder - get DIM tx results based on profile_ix + * @dev: target network device + * @dim: DIM context + * + * Return: DIM irq moderation + */ +struct dim_cq_moder +net_dim_get_tx_irq_moder(struct net_device *dev, struct dim *dim); + +/** + * net_dim_set_rx_mode - set DIM rx cq mode + * @dev: target network device + * @rx_mode: target rx cq mode + */ +void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode); + +/** + * net_dim_set_tx_mode - set DIM tx cq mode + * @dev: target network device + * @tx_mode: target tx cq mode + */ +void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode); + /** * dim_on_top - check if current state is a good place to stop (top location) * @dim: DIM context diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c index 0cd41277c7a3..d7e7028e9b19 100644 --- a/lib/dim/net_dim.c +++ b/lib/dim/net_dim.c @@ -165,6 +165,74 @@ void net_dim_free_irq_moder(struct net_device *dev) } EXPORT_SYMBOL(net_dim_free_irq_moder); +void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx) +{ + struct dim_irq_moder *irq_moder = dev->irq_moder; + + if (!irq_moder) + return; + + if (is_tx) { + INIT_WORK(&dim->work, irq_moder->tx_dim_work); + dim->mode = READ_ONCE(irq_moder->dim_tx_mode); + return; + } + + INIT_WORK(&dim->work, irq_moder->rx_dim_work); + dim->mode = READ_ONCE(irq_moder->dim_rx_mode); +} +EXPORT_SYMBOL(net_dim_setting); + +void net_dim_work_cancel(struct dim *dim) +{ + cancel_work_sync(&dim->work); +} +EXPORT_SYMBOL(net_dim_work_cancel); + +struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev, + struct dim *dim) +{ + struct dim_cq_moder res, *profile; + + rcu_read_lock(); + profile = rcu_dereference(dev->irq_moder->rx_profile); + res = profile[dim->profile_ix]; + rcu_read_unlock(); + + res.cq_period_mode = dim->mode; + + return res; +} +EXPORT_SYMBOL(net_dim_get_rx_irq_moder); + +struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev, + struct dim *dim) +{ + struct dim_cq_moder res, *profile; + + rcu_read_lock(); + profile = rcu_dereference(dev->irq_moder->tx_profile); + res = profile[dim->profile_ix]; + rcu_read_unlock(); + + res.cq_period_mode = dim->mode; + + return res; +} +EXPORT_SYMBOL(net_dim_get_tx_irq_moder); + +void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode) +{ + WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode); +} +EXPORT_SYMBOL(net_dim_set_rx_mode); + +void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode) +{ + WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode); +} +EXPORT_SYMBOL(net_dim_set_tx_mode); + static int net_dim_step(struct dim *dim) { if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2)) -- cgit v1.2.3 From eee5528890d54b22b46f833002355a5ee94c3bb4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 24 Jun 2024 08:55:01 +0900 Subject: PCI: Add Edimax Vendor ID to pci_ids.h Add the Edimax Vendor ID (0x1432) for an ethernet driver for Tehuti Networks TN40xx chips. This ID can be used for Realtek 8180 and Ralink rt28xx wireless drivers. Signed-off-by: FUJITA Tomonori Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20240623235507.108147-2-fujita.tomonori@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 942a587bb97e..677aea20d3e1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2126,6 +2126,8 @@ #define PCI_VENDOR_ID_CHELSIO 0x1425 +#define PCI_VENDOR_ID_EDIMAX 0x1432 + #define PCI_VENDOR_ID_ADLINK 0x144a #define PCI_VENDOR_ID_SAMSUNG 0x144d -- cgit v1.2.3 From 1decf05d0f4de78ef67dc3f794709258c689e09e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Jun 2024 19:25:56 +0300 Subject: wifi: mac80211: inform the low level if drv_stop() is a suspend This will allow the low level driver to take different actions for different flows. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240618192529.739036208b6e.Ie18a2fe8e02bf2717549d39420b350cfdaf3d317@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/admtek/adm8211.c | 2 +- drivers/net/wireless/ath/ar5523/ar5523.c | 2 +- drivers/net/wireless/ath/ath10k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 2 +- drivers/net/wireless/ath/ath5k/base.c | 2 +- drivers/net/wireless/ath/ath5k/base.h | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/carl9170/main.c | 2 +- drivers/net/wireless/ath/wcn36xx/main.c | 2 +- drivers/net/wireless/atmel/at76c50x-usb.c | 2 +- drivers/net/wireless/broadcom/b43/main.c | 2 +- drivers/net/wireless/broadcom/b43legacy/main.c | 2 +- .../wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 2 +- drivers/net/wireless/intel/iwlegacy/3945-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- drivers/net/wireless/intel/iwlegacy/4965.h | 2 +- drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/intersil/p54/main.c | 2 +- drivers/net/wireless/marvell/libertas_tf/main.c | 2 +- drivers/net/wireless/marvell/mwl8k.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7921/main.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt792x.h | 4 ++-- drivers/net/wireless/mediatek/mt76/mt792x_core.c | 2 +- drivers/net/wireless/mediatek/mt76/mt792x_usb.c | 4 ++-- drivers/net/wireless/mediatek/mt76/mt7996/main.c | 2 +- drivers/net/wireless/mediatek/mt7601u/main.c | 2 +- drivers/net/wireless/purelifi/plfxlc/mac.c | 2 +- drivers/net/wireless/purelifi/plfxlc/mac.h | 2 +- drivers/net/wireless/purelifi/plfxlc/usb.c | 4 ++-- drivers/net/wireless/ralink/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c | 2 +- drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c | 2 +- drivers/net/wireless/realtek/rtl8xxxu/core.c | 2 +- drivers/net/wireless/realtek/rtlwifi/core.c | 4 ++-- drivers/net/wireless/realtek/rtw88/mac80211.c | 2 +- drivers/net/wireless/realtek/rtw89/mac80211.c | 2 +- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 3 ++- drivers/net/wireless/silabs/wfx/sta.c | 2 +- drivers/net/wireless/silabs/wfx/sta.h | 2 +- drivers/net/wireless/st/cw1200/sta.c | 2 +- drivers/net/wireless/st/cw1200/sta.h | 2 +- drivers/net/wireless/ti/wl1251/main.c | 2 +- drivers/net/wireless/ti/wlcore/main.c | 2 +- drivers/net/wireless/virtual/mac80211_hwsim.c | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_mac.c | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_mac.h | 2 +- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- drivers/staging/vt6655/device_main.c | 2 +- drivers/staging/vt6656/main_usb.c | 2 +- include/net/mac80211.h | 2 +- net/mac80211/driver-ops.c | 6 +++--- net/mac80211/driver-ops.h | 2 +- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 4 ++-- net/mac80211/pm.c | 4 ++-- net/mac80211/trace.h | 15 ++++++++++++--- net/mac80211/util.c | 4 ++-- 70 files changed, 93 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/admtek/adm8211.c b/drivers/net/wireless/admtek/adm8211.c index e3fd48dd3909..a2d87c3ad196 100644 --- a/drivers/net/wireless/admtek/adm8211.c +++ b/drivers/net/wireless/admtek/adm8211.c @@ -1550,7 +1550,7 @@ fail: return retval; } -static void adm8211_stop(struct ieee80211_hw *dev) +static void adm8211_stop(struct ieee80211_hw *dev, bool suspend) { struct adm8211_priv *priv = dev->priv; diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 5a55db349cb5..156f3650c006 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -1061,7 +1061,7 @@ err: return error; } -static void ar5523_stop(struct ieee80211_hw *hw) +static void ar5523_stop(struct ieee80211_hw *hw, bool suspend) { struct ar5523 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 3bf67b2ecd6d..a5da32e87106 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -5363,7 +5363,7 @@ err: return ret; } -static void ath10k_stop(struct ieee80211_hw *hw) +static void ath10k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath10k *ar = hw->priv; u32 opt; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index a1800c75d32b..c9a13b88c804 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6278,7 +6278,7 @@ err: return ret; } -static void ath11k_mac_op_stop(struct ieee80211_hw *hw) +static void ath11k_mac_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ath11k *ar = hw->priv; struct htt_ppdu_stats_info *ppdu_stats, *tmp; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 509c02bffdae..bd3e021e2a81 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6112,7 +6112,7 @@ static void ath12k_mac_stop(struct ath12k *ar) atomic_set(&ar->num_pending_mgmt_tx, 0); } -static void ath12k_mac_op_stop(struct ieee80211_hw *hw) +static void ath12k_mac_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ath12k_hw *ah = ath12k_hw_to_ah(hw); struct ath12k *ar; diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 9f534ed2fbb3..abe41330fb69 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -2847,7 +2847,7 @@ static void ath5k_stop_tasklets(struct ath5k_hw *ah) * if another thread does a system call and the thread doing the * stop is preempted). */ -void ath5k_stop(struct ieee80211_hw *hw) +void ath5k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath5k_hw *ah = hw->priv; int ret; diff --git a/drivers/net/wireless/ath/ath5k/base.h b/drivers/net/wireless/ath/ath5k/base.h index 97469d0fbad7..594e5b945cb7 100644 --- a/drivers/net/wireless/ath/ath5k/base.h +++ b/drivers/net/wireless/ath/ath5k/base.h @@ -92,7 +92,7 @@ void ath5k_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif); bool ath5k_any_vif_assoc(struct ath5k_hw *ah); int ath5k_start(struct ieee80211_hw *hw); -void ath5k_stop(struct ieee80211_hw *hw); +void ath5k_stop(struct ieee80211_hw *hw, bool suspend); void ath5k_beacon_update_timers(struct ath5k_hw *ah, u64 bc_tsf); int ath5k_beacon_update(struct ieee80211_hw *hw, struct ieee80211_vif *vif); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index b389e19381c4..8a03bcc2789e 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -973,7 +973,7 @@ static int ath9k_htc_start(struct ieee80211_hw *hw) return ret; } -static void ath9k_htc_stop(struct ieee80211_hw *hw) +static void ath9k_htc_stop(struct ieee80211_hw *hw, bool suspend) { struct ath9k_htc_priv *priv = hw->priv; struct ath_hw *ah = priv->ah; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 01173aac3045..b92c89dad8de 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -895,7 +895,7 @@ static void ath9k_pending_key_del(struct ath_softc *sc, u8 keyix) ath_key_delete(common, keyix); } -static void ath9k_stop(struct ieee80211_hw *hw) +static void ath9k_stop(struct ieee80211_hw *hw, bool suspend) { struct ath_softc *sc = hw->priv; struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 7e7797bf44b7..755c068e4197 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -439,7 +439,7 @@ static void carl9170_cancel_worker(struct ar9170 *ar) cancel_work_sync(&ar->ampdu_work); } -static void carl9170_op_stop(struct ieee80211_hw *hw) +static void carl9170_op_stop(struct ieee80211_hw *hw, bool suspend) { struct ar9170 *ar = hw->priv; diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index e760d8002e09..408776562a7e 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -278,7 +278,7 @@ out_err: return ret; } -static void wcn36xx_stop(struct ieee80211_hw *hw) +static void wcn36xx_stop(struct ieee80211_hw *hw, bool suspend) { struct wcn36xx *wcn = hw->priv; diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index baa53cfefe48..504e05ea30f2 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -1850,7 +1850,7 @@ error: return 0; } -static void at76_mac80211_stop(struct ieee80211_hw *hw) +static void at76_mac80211_stop(struct ieee80211_hw *hw, bool suspend) { struct at76_priv *priv = hw->priv; diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c index badb2f494035..8e56dcf9309d 100644 --- a/drivers/net/wireless/broadcom/b43/main.c +++ b/drivers/net/wireless/broadcom/b43/main.c @@ -5078,7 +5078,7 @@ static int b43_op_start(struct ieee80211_hw *hw) return err; } -static void b43_op_stop(struct ieee80211_hw *hw) +static void b43_op_stop(struct ieee80211_hw *hw, bool suspend) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev = wl->current_dev; diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 18eb610f600a..441d6440671b 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -3485,7 +3485,7 @@ out_mutex_unlock: return err; } -static void b43legacy_op_stop(struct ieee80211_hw *hw) +static void b43legacy_op_stop(struct ieee80211_hw *hw, bool suspend) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); struct b43legacy_wldev *dev = wl->current_dev; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 860ef9c11c46..9ac6f3c4bdc7 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -457,7 +457,7 @@ static int brcms_ops_start(struct ieee80211_hw *hw) return err; } -static void brcms_ops_stop(struct ieee80211_hw *hw) +static void brcms_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct brcms_info *wl = hw->priv; int status; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 075b705a8d7b..74fc76c00ebc 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -2813,7 +2813,7 @@ out_release_irq: } static void -il3945_mac_stop(struct ieee80211_hw *hw) +il3945_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct il_priv *il = hw->priv; diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index d018f56be966..1600c344edbb 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -5820,7 +5820,7 @@ out: } void -il4965_mac_stop(struct ieee80211_hw *hw) +il4965_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct il_priv *il = hw->priv; diff --git a/drivers/net/wireless/intel/iwlegacy/4965.h b/drivers/net/wireless/intel/iwlegacy/4965.h index 863e3792d153..951f2245fefb 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965.h +++ b/drivers/net/wireless/intel/iwlegacy/4965.h @@ -151,7 +151,7 @@ void il4965_mac_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb); int il4965_mac_start(struct ieee80211_hw *hw); -void il4965_mac_stop(struct ieee80211_hw *hw); +void il4965_mac_stop(struct ieee80211_hw *hw, bool suspend); void il4965_configure_filter(struct ieee80211_hw *hw, unsigned int changed_flags, unsigned int *total_flags, u64 multicast); diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c index 826f129ecefa..74d163e56511 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/mac80211.c @@ -300,7 +300,7 @@ static int iwlagn_mac_start(struct ieee80211_hw *hw) return ret; } -static void iwlagn_mac_stop(struct ieee80211_hw *hw) +static void iwlagn_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 81103a974b20..896e560f5a82 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1376,7 +1376,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm) } } -void iwl_mvm_mac_stop(struct ieee80211_hw *hw) +void iwl_mvm_mac_stop(struct ieee80211_hw *hw, bool suspend) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index bce517effe55..54c3553db219 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -2818,7 +2818,7 @@ int iwl_mvm_op_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int iwl_mvm_mac_start(struct ieee80211_hw *hw); void iwl_mvm_mac_reconfig_complete(struct ieee80211_hw *hw, enum ieee80211_reconfig_type reconfig_type); -void iwl_mvm_mac_stop(struct ieee80211_hw *hw); +void iwl_mvm_mac_stop(struct ieee80211_hw *hw, bool suspend); static inline int iwl_mvm_mac_config(struct ieee80211_hw *hw, u32 changed) { return 0; diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c index 687841b2fa2a..42111bb53f58 100644 --- a/drivers/net/wireless/intersil/p54/main.c +++ b/drivers/net/wireless/intersil/p54/main.c @@ -197,7 +197,7 @@ out: return err; } -static void p54_stop(struct ieee80211_hw *dev) +static void p54_stop(struct ieee80211_hw *dev, bool suspend) { struct p54_common *priv = dev->priv; int i; diff --git a/drivers/net/wireless/marvell/libertas_tf/main.c b/drivers/net/wireless/marvell/libertas_tf/main.c index 9cca69fe04d7..b47a832b9ae2 100644 --- a/drivers/net/wireless/marvell/libertas_tf/main.c +++ b/drivers/net/wireless/marvell/libertas_tf/main.c @@ -267,7 +267,7 @@ static int lbtf_op_start(struct ieee80211_hw *hw) return 0; } -static void lbtf_op_stop(struct ieee80211_hw *hw) +static void lbtf_op_stop(struct ieee80211_hw *hw, bool suspend) { struct lbtf_private *priv = hw->priv; unsigned long flags; diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 241a02a0accd..b130e057370f 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -4768,7 +4768,7 @@ static int mwl8k_start(struct ieee80211_hw *hw) return rc; } -static void mwl8k_stop(struct ieee80211_hw *hw) +static void mwl8k_stop(struct ieee80211_hw *hw, bool suspend) { struct mwl8k_priv *priv = hw->priv; int i; @@ -6023,7 +6023,7 @@ static int mwl8k_reload_firmware(struct ieee80211_hw *hw, char *fw_image) struct mwl8k_priv *priv = hw->priv; struct mwl8k_vif *vif, *tmp_vif; - mwl8k_stop(hw); + mwl8k_stop(hw, false); mwl8k_rxq_deinit(hw, 0); /* diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 9b49267b1eab..f35fa643c0da 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -23,7 +23,7 @@ mt7603_start(struct ieee80211_hw *hw) } static void -mt7603_stop(struct ieee80211_hw *hw) +mt7603_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7603_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index c27acaf0eb1c..50e262c1622f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -91,7 +91,7 @@ out: return ret; } -static void mt7615_stop(struct ieee80211_hw *hw) +static void mt7615_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7615_dev *dev = mt7615_hw_dev(hw); struct mt7615_phy *phy = mt7615_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c index df737e1ff27b..9335ca0776fe 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c @@ -79,7 +79,7 @@ static void mt7663u_copy(struct mt76_dev *dev, u32 offset, mutex_unlock(&usb->usb_ctrl_mtx); } -static void mt7663u_stop(struct ieee80211_hw *hw) +static void mt7663u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7615_phy *phy = mt7615_hw_phy(hw); struct mt7615_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c index 79b7996ad1a8..2ecee7c5c80d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c @@ -44,7 +44,7 @@ static void mt76x0e_stop_hw(struct mt76x02_dev *dev) mt76_clear(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_RX_DMA_EN); } -static void mt76x0e_stop(struct ieee80211_hw *hw) +static void mt76x0e_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index bba44f289b4e..390f502e97f0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -77,7 +77,7 @@ static void mt76x0u_cleanup(struct mt76x02_dev *dev) mt76u_queues_deinit(&dev->mt76); } -static void mt76x0u_stop(struct ieee80211_hw *hw) +static void mt76x0u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c index bfc8c69f43fa..6accea551319 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c @@ -24,7 +24,7 @@ mt76x2_start(struct ieee80211_hw *hw) } static void -mt76x2_stop(struct ieee80211_hw *hw) +mt76x2_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c index 9fe390fdd730..ba0241c36672 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_main.c @@ -22,7 +22,7 @@ static int mt76x2u_start(struct ieee80211_hw *hw) return 0; } -static void mt76x2u_stop(struct ieee80211_hw *hw) +static void mt76x2u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt76x02_dev *dev = hw->priv; diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c index 2624edbb59a1..049223df9beb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c @@ -108,7 +108,7 @@ static int mt7915_start(struct ieee80211_hw *hw) return ret; } -static void mt7915_stop(struct ieee80211_hw *hw) +static void mt7915_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7915_dev *dev = mt7915_hw_dev(hw); struct mt7915_phy *phy = mt7915_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 3e3ad3518d85..4f30426afbb7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -268,7 +268,7 @@ static int mt7921_start(struct ieee80211_hw *hw) return err; } -static void mt7921_stop(struct ieee80211_hw *hw) +static void mt7921_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); int err = 0; @@ -281,7 +281,7 @@ static void mt7921_stop(struct ieee80211_hw *hw) return; } - mt792x_stop(hw); + mt792x_stop(hw, false); } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt792x.h b/drivers/net/wireless/mediatek/mt76/mt792x.h index 20578497a405..cf14a38c5e72 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x.h +++ b/drivers/net/wireless/mediatek/mt76/mt792x.h @@ -251,7 +251,7 @@ static inline bool mt792x_dma_need_reinit(struct mt792x_dev *dev) #define mt792x_mutex_release(dev) \ mt76_connac_mutex_release(&(dev)->mt76, &(dev)->pm) -void mt792x_stop(struct ieee80211_hw *hw); +void mt792x_stop(struct ieee80211_hw *hw, bool suspend); void mt792x_pm_wake_work(struct work_struct *work); void mt792x_pm_power_save_work(struct work_struct *work); void mt792x_reset(struct mt76_dev *mdev); @@ -368,7 +368,7 @@ void mt792xu_wr(struct mt76_dev *dev, u32 addr, u32 val); u32 mt792xu_rmw(struct mt76_dev *dev, u32 addr, u32 mask, u32 val); void mt792xu_copy(struct mt76_dev *dev, u32 offset, const void *data, int len); void mt792xu_disconnect(struct usb_interface *usb_intf); -void mt792xu_stop(struct ieee80211_hw *hw); +void mt792xu_stop(struct ieee80211_hw *hw, bool suspend); static inline void mt792x_skb_add_usb_sdio_hdr(struct mt792x_dev *dev, struct sk_buff *skb, diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c index a405af8d9052..4adca99eb9b8 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c @@ -91,7 +91,7 @@ void mt792x_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, } EXPORT_SYMBOL_GPL(mt792x_tx); -void mt792x_stop(struct ieee80211_hw *hw) +void mt792x_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); struct mt792x_phy *phy = mt792x_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c index b49668a4b784..76272a03b22e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c @@ -285,12 +285,12 @@ int mt792xu_init_reset(struct mt792x_dev *dev) } EXPORT_SYMBOL_GPL(mt792xu_init_reset); -void mt792xu_stop(struct ieee80211_hw *hw) +void mt792xu_stop(struct ieee80211_hw *hw, bool suspend) { struct mt792x_dev *dev = mt792x_hw_dev(hw); mt76u_stop_tx(&dev->mt76); - mt792x_stop(hw); + mt792x_stop(hw, false); } EXPORT_SYMBOL_GPL(mt792xu_stop); diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c index 7c97140d8255..bce082038219 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c @@ -93,7 +93,7 @@ static int mt7996_start(struct ieee80211_hw *hw) return ret; } -static void mt7996_stop(struct ieee80211_hw *hw) +static void mt7996_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7996_dev *dev = mt7996_hw_dev(hw); struct mt7996_phy *phy = mt7996_hw_phy(hw); diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index a7330576486b..7570c6ceecea 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -28,7 +28,7 @@ out: return ret; } -static void mt7601u_stop(struct ieee80211_hw *hw) +static void mt7601u_stop(struct ieee80211_hw *hw, bool suspend) { struct mt7601u_dev *dev = hw->priv; diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.c b/drivers/net/wireless/purelifi/plfxlc/mac.c index 641f847d47ab..eae93efa6150 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.c +++ b/drivers/net/wireless/purelifi/plfxlc/mac.c @@ -111,7 +111,7 @@ int plfxlc_op_start(struct ieee80211_hw *hw) return 0; } -void plfxlc_op_stop(struct ieee80211_hw *hw) +void plfxlc_op_stop(struct ieee80211_hw *hw, bool suspend) { struct plfxlc_mac *mac = plfxlc_hw_mac(hw); diff --git a/drivers/net/wireless/purelifi/plfxlc/mac.h b/drivers/net/wireless/purelifi/plfxlc/mac.h index 49b92413729b..9384acddcf26 100644 --- a/drivers/net/wireless/purelifi/plfxlc/mac.h +++ b/drivers/net/wireless/purelifi/plfxlc/mac.h @@ -178,7 +178,7 @@ int plfxlc_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, void plfxlc_mac_tx_failed(struct urb *urb); void plfxlc_mac_tx_to_dev(struct sk_buff *skb, int error); int plfxlc_op_start(struct ieee80211_hw *hw); -void plfxlc_op_stop(struct ieee80211_hw *hw); +void plfxlc_op_stop(struct ieee80211_hw *hw, bool suspend); int plfxlc_restore_settings(struct plfxlc_mac *mac); #endif /* PLFXLC_MAC_H */ diff --git a/drivers/net/wireless/purelifi/plfxlc/usb.c b/drivers/net/wireless/purelifi/plfxlc/usb.c index 311676c1ece0..15334940287d 100644 --- a/drivers/net/wireless/purelifi/plfxlc/usb.c +++ b/drivers/net/wireless/purelifi/plfxlc/usb.c @@ -408,7 +408,7 @@ void plfxlc_usb_init(struct plfxlc_usb *usb, struct ieee80211_hw *hw, void plfxlc_usb_release(struct plfxlc_usb *usb) { - plfxlc_op_stop(plfxlc_usb_to_hw(usb)); + plfxlc_op_stop(plfxlc_usb_to_hw(usb), false); plfxlc_usb_disable_tx(usb); plfxlc_usb_disable_rx(usb); usb_set_intfdata(usb->intf, NULL); @@ -761,7 +761,7 @@ static void plfxlc_usb_resume(struct plfxlc_usb *usb) static void plfxlc_usb_stop(struct plfxlc_usb *usb) { - plfxlc_op_stop(plfxlc_usb_to_hw(usb)); + plfxlc_op_stop(plfxlc_usb_to_hw(usb), false); plfxlc_usb_disable_tx(usb); plfxlc_usb_disable_rx(usb); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h index bb648f95dfdd..dfb4bb370f01 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h @@ -1450,7 +1450,7 @@ void rt2x00mac_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb); int rt2x00mac_start(struct ieee80211_hw *hw); -void rt2x00mac_stop(struct ieee80211_hw *hw); +void rt2x00mac_stop(struct ieee80211_hw *hw, bool suspend); void rt2x00mac_reconfig_complete(struct ieee80211_hw *hw, enum ieee80211_reconfig_type reconfig_type); int rt2x00mac_add_interface(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c index 75fda72c14ca..451632488805 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c @@ -178,7 +178,7 @@ int rt2x00mac_start(struct ieee80211_hw *hw) } EXPORT_SYMBOL_GPL(rt2x00mac_start); -void rt2x00mac_stop(struct ieee80211_hw *hw) +void rt2x00mac_stop(struct ieee80211_hw *hw, bool suspend) { struct rt2x00_dev *rt2x00dev = hw->priv; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c index 77b6cb7e1f6b..ded8d4d59289 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8180/dev.c @@ -1249,7 +1249,7 @@ static int rtl8180_start(struct ieee80211_hw *dev) return ret; } -static void rtl8180_stop(struct ieee80211_hw *dev) +static void rtl8180_stop(struct ieee80211_hw *dev, bool suspend) { struct rtl8180_priv *priv = dev->priv; u8 reg; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index 78d99afa373d..220ac5bdf279 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -1019,7 +1019,7 @@ rtl8187_start_exit: return ret; } -static void rtl8187_stop(struct ieee80211_hw *dev) +static void rtl8187_stop(struct ieee80211_hw *dev, bool suspend) { struct rtl8187_priv *priv = dev->priv; struct sk_buff *skb; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index 89a841b4e8d5..3685dbefc9bd 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -7521,7 +7521,7 @@ error_out: return ret; } -static void rtl8xxxu_stop(struct ieee80211_hw *hw) +static void rtl8xxxu_stop(struct ieee80211_hw *hw, bool suspend) { struct rtl8xxxu_priv *priv = hw->priv; unsigned long flags; diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 42b7db12b1bd..7537f04b1930 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -144,7 +144,7 @@ static int rtl_op_start(struct ieee80211_hw *hw) return err; } -static void rtl_op_stop(struct ieee80211_hw *hw) +static void rtl_op_stop(struct ieee80211_hw *hw, bool suspend) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -547,7 +547,7 @@ static int rtl_op_suspend(struct ieee80211_hw *hw, rtlhal->enter_pnp_sleep = true; rtl_lps_leave(hw, true); - rtl_op_stop(hw); + rtl_op_stop(hw, false); device_set_wakeup_enable(wiphy_dev(hw->wiphy), true); return 0; } diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c index 0acebbfa13c4..63326b352738 100644 --- a/drivers/net/wireless/realtek/rtw88/mac80211.c +++ b/drivers/net/wireless/realtek/rtw88/mac80211.c @@ -62,7 +62,7 @@ static int rtw_ops_start(struct ieee80211_hw *hw) return ret; } -static void rtw_ops_stop(struct ieee80211_hw *hw) +static void rtw_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct rtw_dev *rtwdev = hw->priv; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 41b286da3d59..722d09e9fbb5 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -66,7 +66,7 @@ static int rtw89_ops_start(struct ieee80211_hw *hw) return ret; } -static void rtw89_ops_stop(struct ieee80211_hw *hw) +static void rtw89_ops_stop(struct ieee80211_hw *hw, bool suspend) { struct rtw89_dev *rtwdev = hw->priv; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 211fa25b9a78..3425a473b9a1 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -410,10 +410,11 @@ static int rsi_mac80211_start(struct ieee80211_hw *hw) /** * rsi_mac80211_stop() - This is the last handler that 802.11 module calls. * @hw: Pointer to the ieee80211_hw structure. + * @suspend: true if the this was called from suspend flow. * * Return: None. */ -static void rsi_mac80211_stop(struct ieee80211_hw *hw) +static void rsi_mac80211_stop(struct ieee80211_hw *hw, bool suspend) { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; diff --git a/drivers/net/wireless/silabs/wfx/sta.c b/drivers/net/wireless/silabs/wfx/sta.c index a904602f02ce..216d43c8bd6e 100644 --- a/drivers/net/wireless/silabs/wfx/sta.c +++ b/drivers/net/wireless/silabs/wfx/sta.c @@ -805,7 +805,7 @@ int wfx_start(struct ieee80211_hw *hw) return 0; } -void wfx_stop(struct ieee80211_hw *hw) +void wfx_stop(struct ieee80211_hw *hw, bool suspend) { struct wfx_dev *wdev = hw->priv; diff --git a/drivers/net/wireless/silabs/wfx/sta.h b/drivers/net/wireless/silabs/wfx/sta.h index c478ddcb934b..7817c7c6f3dd 100644 --- a/drivers/net/wireless/silabs/wfx/sta.h +++ b/drivers/net/wireless/silabs/wfx/sta.h @@ -20,7 +20,7 @@ struct wfx_sta_priv { /* mac80211 interface */ int wfx_start(struct ieee80211_hw *hw); -void wfx_stop(struct ieee80211_hw *hw); +void wfx_stop(struct ieee80211_hw *hw, bool suspend); int wfx_config(struct ieee80211_hw *hw, u32 changed); int wfx_set_rts_threshold(struct ieee80211_hw *hw, u32 value); void wfx_set_default_unicast_key(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int idx); diff --git a/drivers/net/wireless/st/cw1200/sta.c b/drivers/net/wireless/st/cw1200/sta.c index 8ef1d06b9bbd..c259da8161e4 100644 --- a/drivers/net/wireless/st/cw1200/sta.c +++ b/drivers/net/wireless/st/cw1200/sta.c @@ -90,7 +90,7 @@ out: return ret; } -void cw1200_stop(struct ieee80211_hw *dev) +void cw1200_stop(struct ieee80211_hw *dev, bool suspend) { struct cw1200_common *priv = dev->priv; LIST_HEAD(list); diff --git a/drivers/net/wireless/st/cw1200/sta.h b/drivers/net/wireless/st/cw1200/sta.h index a49f187c7049..b955b92cfd73 100644 --- a/drivers/net/wireless/st/cw1200/sta.h +++ b/drivers/net/wireless/st/cw1200/sta.h @@ -13,7 +13,7 @@ /* mac80211 API */ int cw1200_start(struct ieee80211_hw *dev); -void cw1200_stop(struct ieee80211_hw *dev); +void cw1200_stop(struct ieee80211_hw *dev, bool suspend); int cw1200_add_interface(struct ieee80211_hw *dev, struct ieee80211_vif *vif); void cw1200_remove_interface(struct ieee80211_hw *dev, diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c index 0da2d29dd7bd..bb53d681c11b 100644 --- a/drivers/net/wireless/ti/wl1251/main.c +++ b/drivers/net/wireless/ti/wl1251/main.c @@ -415,7 +415,7 @@ out: return ret; } -static void wl1251_op_stop(struct ieee80211_hw *hw) +static void wl1251_op_stop(struct ieee80211_hw *hw, bool suspend) { struct wl1251 *wl = hw->priv; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 35d1114a28aa..7e1d30f89855 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2085,7 +2085,7 @@ static void wlcore_op_stop_locked(struct wl1271 *wl) memset(wl->reg_ch_conf_last, 0, sizeof(wl->reg_ch_conf_last)); } -static void wlcore_op_stop(struct ieee80211_hw *hw) +static void wlcore_op_stop(struct ieee80211_hw *hw, bool suspend) { struct wl1271 *wl = hw->priv; diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index fbf24870209d..8491eb32f760 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2098,7 +2098,7 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) } -static void mac80211_hwsim_stop(struct ieee80211_hw *hw) +static void mac80211_hwsim_stop(struct ieee80211_hw *hw, bool suspend) { struct mac80211_hwsim_data *data = hw->priv; int i; diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c index 900c063bd724..f90c33d19b39 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.c @@ -326,7 +326,7 @@ out: return r; } -void zd_op_stop(struct ieee80211_hw *hw) +void zd_op_stop(struct ieee80211_hw *hw, bool suspend) { struct zd_mac *mac = zd_hw_mac(hw); struct zd_chip *chip = &mac->chip; diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_mac.h b/drivers/net/wireless/zydas/zd1211rw/zd_mac.h index 5ff84bdc5a4c..053748a474ec 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_mac.h +++ b/drivers/net/wireless/zydas/zd1211rw/zd_mac.h @@ -303,7 +303,7 @@ void zd_mac_tx_failed(struct urb *urb); void zd_mac_tx_to_dev(struct sk_buff *skb, int error); int zd_op_start(struct ieee80211_hw *hw); -void zd_op_stop(struct ieee80211_hw *hw); +void zd_op_stop(struct ieee80211_hw *hw, bool suspend); int zd_restore_settings(struct zd_mac *mac); #ifdef DEBUG diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2ee4218da1c5..a8a94edf2a70 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1476,7 +1476,7 @@ static void zd_usb_stop(struct zd_usb *usb) { dev_dbg_f(zd_usb_dev(usb), "\n"); - zd_op_stop(zd_usb_to_hw(usb)); + zd_op_stop(zd_usb_to_hw(usb), false); zd_usb_disable_tx(usb); zd_usb_disable_rx(usb); diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 283804b49e91..3ff8103366c1 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1339,7 +1339,7 @@ err_free_rings: return ret; } -static void vnt_stop(struct ieee80211_hw *hw) +static void vnt_stop(struct ieee80211_hw *hw, bool suspend) { struct vnt_private *priv = hw->priv; diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 7bbed462f062..4f09e733e7a8 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -613,7 +613,7 @@ err: return ret; } -static void vnt_stop(struct ieee80211_hw *hw) +static void vnt_stop(struct ieee80211_hw *hw, bool suspend) { struct vnt_private *priv = hw->priv; int i; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ecfa65ade226..9c96e8ae9ef7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4444,7 +4444,7 @@ struct ieee80211_ops { struct ieee80211_tx_control *control, struct sk_buff *skb); int (*start)(struct ieee80211_hw *hw); - void (*stop)(struct ieee80211_hw *hw); + void (*stop)(struct ieee80211_hw *hw, bool suspend); #ifdef CONFIG_PM int (*suspend)(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); int (*resume)(struct ieee80211_hw *hw); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index dce37ba8ebe3..bef43990f07a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -33,7 +33,7 @@ int drv_start(struct ieee80211_local *local) return ret; } -void drv_stop(struct ieee80211_local *local) +void drv_stop(struct ieee80211_local *local, bool suspend) { might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); @@ -41,8 +41,8 @@ void drv_stop(struct ieee80211_local *local) if (WARN_ON(!local->started)) return; - trace_drv_stop(local); - local->ops->stop(&local->hw); + trace_drv_stop(local, suspend); + local->ops->stop(&local->hw, suspend); trace_drv_return_void(local); /* sync away all work on the tasklet before clearing started */ diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index d4e73d3630e0..d382d9729e85 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -88,7 +88,7 @@ static inline int drv_get_et_sset_count(struct ieee80211_sub_if_data *sdata, } int drv_start(struct ieee80211_local *local); -void drv_stop(struct ieee80211_local *local); +void drv_stop(struct ieee80211_local *local, bool suspend); #ifdef CONFIG_PM static inline int drv_suspend(struct ieee80211_local *local, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 501f02809135..6349552e62a8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2246,7 +2246,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, /* Suspend/resume and hw reconfiguration */ int ieee80211_reconfig(struct ieee80211_local *local); -void ieee80211_stop_device(struct ieee80211_local *local); +void ieee80211_stop_device(struct ieee80211_local *local, bool suspend); int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f06e165d6c7a..64106097949a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -698,7 +698,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do wiphy_delayed_work_flush(local->hw.wiphy, &local->scan_work); if (local->open_count == 0) { - ieee80211_stop_device(local); + ieee80211_stop_device(local, false); /* no reconfiguring after stop! */ return; @@ -1435,7 +1435,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_remove_interface(local, sdata); err_stop: if (!local->open_count) - drv_stop(local); + drv_stop(local, false); err_del_bss: sdata->bss = NULL; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index c1fa26e09479..d823d58303e8 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2020-2021, 2023 Intel Corporation + * Copyright (C) 2020-2021, 2023-2024 Intel Corporation */ #include #include @@ -171,7 +171,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ - ieee80211_stop_device(local); + ieee80211_stop_device(local, true); suspend: local->suspended = true; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index b26aacfbc622..dc498cd8cd91 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -328,9 +328,18 @@ TRACE_EVENT(drv_set_wakeup, TP_printk(LOCAL_PR_FMT " enabled:%d", LOCAL_PR_ARG, __entry->enabled) ); -DEFINE_EVENT(local_only_evt, drv_stop, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +TRACE_EVENT(drv_stop, + TP_PROTO(struct ieee80211_local *local, bool suspend), + TP_ARGS(local, suspend), + TP_STRUCT__entry( + LOCAL_ENTRY + __field(bool, suspend) + ), + TP_fast_assign( + LOCAL_ASSIGN; + __entry->suspend = suspend; + ), + TP_printk(LOCAL_PR_FMT " suspend:%d", LOCAL_PR_ARG, __entry->suspend) ); DEFINE_EVENT(local_sdata_addr_evt, drv_add_interface, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b3b8873a107b..610f0a94796c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1565,7 +1565,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, return supp_rates; } -void ieee80211_stop_device(struct ieee80211_local *local) +void ieee80211_stop_device(struct ieee80211_local *local, bool suspend) { ieee80211_handle_queued_frames(local); @@ -1576,7 +1576,7 @@ void ieee80211_stop_device(struct ieee80211_local *local) flush_workqueue(local->workqueue); wiphy_work_flush(local->hw.wiphy, NULL); - drv_stop(local); + drv_stop(local, suspend); } static void ieee80211_flush_completed_scan(struct ieee80211_local *local, -- cgit v1.2.3 From f531d13bdfe3f4f084aaa8acae2cb0f02295f5ae Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 27 May 2024 20:29:14 -0700 Subject: xfrm: support sending NAT keepalives in ESP in UDP states Add the ability to send out RFC-3948 NAT keepalives from the xfrm stack. To use, Userspace sets an XFRM_NAT_KEEPALIVE_INTERVAL integer property when creating XFRM outbound states which denotes the number of seconds between keepalive messages. Keepalive messages are sent from a per net delayed work which iterates over the xfrm states. The logic is guarded by the xfrm state spinlock due to the xfrm state walk iterator. Possible future enhancements: - Adding counters to keep track of sent keepalives. - deduplicate NAT keepalives between states sharing the same nat keepalive parameters. - provisioning hardware offloads for devices capable of implementing this. - revise xfrm state list to use an rcu list in order to avoid running this under spinlock. Suggested-by: Paul Wouters Tested-by: Paul Wouters Tested-by: Antony Antony Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 3 + include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 10 ++ include/uapi/linux/xfrm.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/xfrm6_policy.c | 7 + net/xfrm/Makefile | 3 +- net/xfrm/xfrm_compat.c | 6 +- net/xfrm/xfrm_nat_keepalive.c | 292 ++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 8 ++ net/xfrm/xfrm_state.c | 17 +++ net/xfrm/xfrm_user.c | 15 +++ 12 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 net/xfrm/xfrm_nat_keepalive.c (limited to 'include') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 485c39a89866..11cefd50704d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -9,6 +9,7 @@ #include #include #include +#include /* structs from net/ip6_fib.h */ struct fib6_info; @@ -72,6 +73,8 @@ struct ipv6_stub { int (*output)(struct net *, struct sock *, struct sk_buff *)); struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, struct net_device *dev); + int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 423b52eca908..d489d9250bff 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -83,6 +83,7 @@ struct netns_xfrm { spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; + struct delayed_work nat_keepalive_work; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..46a214a76081 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -229,6 +229,10 @@ struct xfrm_state { struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; + /* NAT keepalive */ + u32 nat_keepalive_interval; /* seconds */ + time64_t nat_keepalive_expiration; + /* Data for care-of address */ xfrm_address_t *coaddr; @@ -2203,4 +2207,10 @@ static inline int register_xfrm_state_bpf(void) } #endif +int xfrm_nat_keepalive_init(unsigned short family); +void xfrm_nat_keepalive_fini(unsigned short family); +int xfrm_nat_keepalive_net_init(struct net *net); +int xfrm_nat_keepalive_net_fini(struct net *net); +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x); + #endif /* _NET_XFRM_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d950d02ab791..f28701500714 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -321,6 +321,7 @@ enum xfrm_attr_type_t { XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ + XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8041dc181bd4..2b893858b9a9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1060,6 +1060,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, + .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index cc885d3aa9e5..6837ff05f11a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -284,8 +284,14 @@ int __init xfrm6_init(void) ret = register_pernet_subsys(&xfrm6_net_ops); if (ret) goto out_protocol; + + ret = xfrm_nat_keepalive_init(AF_INET6); + if (ret) + goto out_nat_keepalive; out: return ret; +out_nat_keepalive: + unregister_pernet_subsys(&xfrm6_net_ops); out_protocol: xfrm6_protocol_fini(); out_state: @@ -297,6 +303,7 @@ out_policy: void xfrm6_fini(void) { + xfrm_nat_keepalive_fini(AF_INET6); unregister_pernet_subsys(&xfrm6_net_ops); xfrm6_protocol_fini(); xfrm6_policy_fini(); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 547cec77ba03..512e0b2f8514 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -13,7 +13,8 @@ endif obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o xfrm_device.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o \ + xfrm_nat_keepalive.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 703d4172c7d7..91357ccaf4af 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -131,6 +131,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -280,9 +281,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: case XFRMA_SA_DIR: + case XFRMA_NAT_KEEPALIVE_INTERVAL: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -437,7 +439,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_DIR); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c new file mode 100644 index 000000000000..82f0a301683f --- /dev/null +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * xfrm_nat_keepalive.c + * + * (c) 2024 Eyal Birger + */ + +#include +#include +#include + +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv4); +#if IS_ENABLED(CONFIG_IPV6) +static DEFINE_PER_CPU(struct sock *, nat_keepalive_sk_ipv6); +#endif + +struct nat_keepalive { + struct net *net; + u16 family; + xfrm_address_t saddr; + xfrm_address_t daddr; + __be16 encap_sport; + __be16 encap_dport; + __u32 smark; +}; + +static void nat_keepalive_init(struct nat_keepalive *ka, struct xfrm_state *x) +{ + ka->net = xs_net(x); + ka->family = x->props.family; + ka->saddr = x->props.saddr; + ka->daddr = x->id.daddr; + ka->encap_sport = x->encap->encap_sport; + ka->encap_dport = x->encap->encap_dport; + ka->smark = xfrm_smark_get(0, x); +} + +static int nat_keepalive_send_ipv4(struct sk_buff *skb, + struct nat_keepalive *ka) +{ + struct net *net = ka->net; + struct flowi4 fl4; + struct rtable *rt; + struct sock *sk; + __u8 tos = 0; + int err; + + flowi4_init_output(&fl4, 0 /* oif */, skb->mark, tos, + RT_SCOPE_UNIVERSE, IPPROTO_UDP, 0, + ka->daddr.a4, ka->saddr.a4, ka->encap_dport, + ka->encap_sport, sock_net_uid(net, NULL)); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + skb_dst_set(skb, &rt->dst); + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv4); + sock_net_set(sk, net); + err = ip_build_and_send_pkt(skb, sk, fl4.saddr, fl4.daddr, NULL, tos); + sock_net_set(sk, &init_net); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int nat_keepalive_send_ipv6(struct sk_buff *skb, + struct nat_keepalive *ka, + struct udphdr *uh) +{ + struct net *net = ka->net; + struct dst_entry *dst; + struct flowi6 fl6; + struct sock *sk; + __wsum csum; + int err; + + csum = skb_checksum(skb, 0, skb->len, 0); + uh->check = csum_ipv6_magic(&ka->saddr.in6, &ka->daddr.in6, + skb->len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.saddr = ka->saddr.in6; + fl6.daddr = ka->daddr.in6; + fl6.flowi6_proto = IPPROTO_UDP; + fl6.fl6_sport = ka->encap_sport; + fl6.fl6_dport = ka->encap_dport; + + sk = *this_cpu_ptr(&nat_keepalive_sk_ipv6); + sock_net_set(sk, net); + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + skb_dst_set(skb, dst); + err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); + sock_net_set(sk, &init_net); + return err; +} +#endif + +static void nat_keepalive_send(struct nat_keepalive *ka) +{ + const int nat_ka_hdrs_len = max(sizeof(struct iphdr), + sizeof(struct ipv6hdr)) + + sizeof(struct udphdr); + const u8 nat_ka_payload = 0xFF; + int err = -EAFNOSUPPORT; + struct sk_buff *skb; + struct udphdr *uh; + + skb = alloc_skb(nat_ka_hdrs_len + sizeof(nat_ka_payload), GFP_ATOMIC); + if (unlikely(!skb)) + return; + + skb_reserve(skb, nat_ka_hdrs_len); + + skb_put_u8(skb, nat_ka_payload); + + uh = skb_push(skb, sizeof(*uh)); + uh->source = ka->encap_sport; + uh->dest = ka->encap_dport; + uh->len = htons(skb->len); + uh->check = 0; + + skb->mark = ka->smark; + + switch (ka->family) { + case AF_INET: + err = nat_keepalive_send_ipv4(skb, ka); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_send_ipv6(skb, ka, uh); + break; +#endif + } + if (err) + kfree_skb(skb); +} + +struct nat_keepalive_work_ctx { + time64_t next_run; + time64_t now; +}; + +static int nat_keepalive_work_single(struct xfrm_state *x, int count, void *ptr) +{ + struct nat_keepalive_work_ctx *ctx = ptr; + bool send_keepalive = false; + struct nat_keepalive ka; + time64_t next_run; + u32 interval; + int delta; + + interval = x->nat_keepalive_interval; + if (!interval) + return 0; + + spin_lock(&x->lock); + + delta = (int)(ctx->now - x->lastused); + if (delta < interval) { + x->nat_keepalive_expiration = ctx->now + interval - delta; + next_run = x->nat_keepalive_expiration; + } else if (x->nat_keepalive_expiration > ctx->now) { + next_run = x->nat_keepalive_expiration; + } else { + next_run = ctx->now + interval; + nat_keepalive_init(&ka, x); + send_keepalive = true; + } + + spin_unlock(&x->lock); + + if (send_keepalive) + nat_keepalive_send(&ka); + + if (!ctx->next_run || next_run < ctx->next_run) + ctx->next_run = next_run; + return 0; +} + +static void nat_keepalive_work(struct work_struct *work) +{ + struct nat_keepalive_work_ctx ctx; + struct xfrm_state_walk walk; + struct net *net; + + ctx.next_run = 0; + ctx.now = ktime_get_real_seconds(); + + net = container_of(work, struct net, xfrm.nat_keepalive_work.work); + xfrm_state_walk_init(&walk, IPPROTO_ESP, NULL); + xfrm_state_walk(net, &walk, nat_keepalive_work_single, &ctx); + xfrm_state_walk_done(&walk, net); + if (ctx.next_run) + schedule_delayed_work(&net->xfrm.nat_keepalive_work, + (ctx.next_run - ctx.now) * HZ); +} + +static int nat_keepalive_sk_init(struct sock * __percpu *socks, + unsigned short family) +{ + struct sock *sk; + int err, i; + + for_each_possible_cpu(i) { + err = inet_ctl_sock_create(&sk, family, SOCK_RAW, IPPROTO_UDP, + &init_net); + if (err < 0) + goto err; + + *per_cpu_ptr(socks, i) = sk; + } + + return 0; +err: + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); + return err; +} + +static void nat_keepalive_sk_fini(struct sock * __percpu *socks) +{ + int i; + + for_each_possible_cpu(i) + inet_ctl_sock_destroy(*per_cpu_ptr(socks, i)); +} + +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x) +{ + struct net *net; + + if (!x->nat_keepalive_interval) + return; + + net = xs_net(x); + schedule_delayed_work(&net->xfrm.nat_keepalive_work, 0); +} + +int __net_init xfrm_nat_keepalive_net_init(struct net *net) +{ + INIT_DELAYED_WORK(&net->xfrm.nat_keepalive_work, nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_net_fini(struct net *net) +{ + cancel_delayed_work_sync(&net->xfrm.nat_keepalive_work); + return 0; +} + +int xfrm_nat_keepalive_init(unsigned short family) +{ + int err = -EAFNOSUPPORT; + + switch (family) { + case AF_INET: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv4, PF_INET); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + err = nat_keepalive_sk_init(&nat_keepalive_sk_ipv6, PF_INET6); + break; +#endif + } + + if (err) + pr_err("xfrm nat keepalive init: failed to init err:%d\n", err); + return err; +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_init); + +void xfrm_nat_keepalive_fini(unsigned short family) +{ + switch (family) { + case AF_INET: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv4); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + nat_keepalive_sk_fini(&nat_keepalive_sk_ipv6); + break; +#endif + } +} +EXPORT_SYMBOL_GPL(xfrm_nat_keepalive_fini); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 475b904fe68b..6603d3bd171f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4289,8 +4289,14 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out_sysctl; + rv = xfrm_nat_keepalive_net_init(net); + if (rv < 0) + goto out_nat_keepalive; + return 0; +out_nat_keepalive: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -4303,6 +4309,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -4364,6 +4371,7 @@ void __init xfrm_init(void) #endif register_xfrm_state_bpf(); + xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 649bb739df0d..abadc857cd45 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -715,6 +715,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->id.spi) hlist_del_rcu(&x->byspi); net->xfrm.state_num--; + xfrm_nat_keepalive_state_updated(x); spin_unlock(&net->xfrm.xfrm_state_lock); if (x->encap_sk) @@ -1453,6 +1454,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + xfrm_nat_keepalive_state_updated(x); } /* net->xfrm.xfrm_state_lock is held */ @@ -2871,6 +2873,21 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, goto error; } + if (x->nat_keepalive_interval) { + if (x->dir != XFRM_SA_DIR_OUT) { + NL_SET_ERR_MSG(extack, "NAT keepalive is only supported for outbound SAs"); + err = -EINVAL; + goto error; + } + + if (!x->encap || x->encap->encap_type != UDP_ENCAP_ESPINUDP) { + NL_SET_ERR_MSG(extack, + "NAT keepalive is only supported for UDP encapsulation"); + err = -EINVAL; + goto error; + } + } + error: return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e83c687bd64e..a552cfa623ea 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -833,6 +833,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_SA_DIR]) x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); + if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]) + x->nat_keepalive_interval = + nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -1288,6 +1292,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, } if (x->dir) ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); + + if (x->nat_keepalive_interval) { + ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL, + x->nat_keepalive_interval); + if (ret) + goto out; + } out: return ret; } @@ -3165,6 +3176,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), + [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); @@ -3474,6 +3486,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->dir) l += nla_total_size(sizeof(x->dir)); + if (x->nat_keepalive_interval) + l += nla_total_size(sizeof(x->nat_keepalive_interval)); + return l; } -- cgit v1.2.3 From 8c62617295d3c4cd03f1a02c3b9bf9d4e6d6e0c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 13:25:27 +0200 Subject: wifi: mac80211: remove DEAUTH_NEED_MGD_TX_PREP This flag is annoying because it puts a lot of logic into mac80211 that could just as well be in the driver (only iwlmvm uses it) and the implementation is also broken for MLO. Remove the flag in favour of calling drv_mgd_prepare_tx() without any conditions even for the deauth-while-assoc case. The drivers that implement it can take the appropriate actions, which for the only user of DEAUTH_NEED_MGD_TX_PREP (iwlmvm) is a bit more tricky than the implementation in mac80211 is anyway, and all others have no need and can just exit if info->was_assoc is set. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627132527.94924bcc9c9e.I328a219e45f2e2724cd52e75bb9feee3bf21a463@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 12 +++++----- .../net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 2 ++ drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 4 ++++ .../net/wireless/intel/iwlwifi/mvm/time-event.c | 2 ++ include/net/mac80211.h | 22 +++++------------- net/mac80211/debugfs.c | 1 - net/mac80211/main.c | 3 --- net/mac80211/mlme.c | 26 +++------------------- 8 files changed, 23 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 90cf0ec69682..60bfe42d5386 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -383,12 +383,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (!mvm->mld_api_is_used) ieee80211_hw_set(hw, TIMING_BEACON_ONLY); - /* We should probably have this, but mac80211 - * currently doesn't support it for MLO. - */ - if (!(hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) - ieee80211_hw_set(hw, DEAUTH_NEED_MGD_TX_PREP); - /* * On older devices, enabling TX A-MSDU occasionally leads to * something getting messed up, the command read from the FIFO @@ -2853,6 +2847,8 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_ASSOC) { if (vif->cfg.assoc) { + mvmvif->session_prot_connection_loss = false; + /* clear statistics to get clean beacon counter */ iwl_mvm_request_statistics(mvm, true); for_each_mvm_vif_valid_link(mvmvif, i) @@ -4268,8 +4264,12 @@ void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_prep_tx_info *info) { + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); + if (info->was_assoc && !mvmvif->session_prot_connection_loss) + return; + guard(mvm)(mvm); iwl_mvm_protect_assoc(mvm, vif, info->duration, info->link_id); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index ebf313e161f4..3c99396ad369 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -873,6 +873,8 @@ static void iwl_mvm_mld_vif_cfg_changed_station(struct iwl_mvm *mvm, if (changes & BSS_CHANGED_ASSOC) { if (vif->cfg.assoc) { + mvmvif->session_prot_connection_loss = false; + /* clear statistics to get clean beacon counter */ iwl_mvm_request_statistics(mvm, true); iwl_mvm_sf_update(mvm, vif, false); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 362973fdeac0..393ce424c196 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -450,6 +450,9 @@ struct iwl_mvm_esr_exit { * @unblock_esr_tpt_wk: work for unblocking EMLSR when tpt is high enough. * @roc_activity: currently running ROC activity for this vif (or * ROC_NUM_ACTIVITIES if no activity is running). + * @session_prot_connection_loss: the connection was lost due to session + * protection ending without receiving a beacon, so we need to now + * protect the deauth separately */ struct iwl_mvm_vif { struct iwl_mvm *mvm; @@ -463,6 +466,7 @@ struct iwl_mvm_vif { bool pm_enabled; bool monitor_active; bool esr_active; + bool session_prot_connection_loss; u8 low_latency: 6; u8 low_latency_actual: 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 2773dfa8baa9..77b0cae8566f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -222,6 +222,8 @@ static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm, iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_ASSOC_FAILED, NULL); + + mvmvif->session_prot_connection_loss = true; } iwl_mvm_connection_loss(mvm, vif, errmsg); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9c96e8ae9ef7..bd0f8aefa797 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2767,14 +2767,6 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * - * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the - * mgd_prepare_tx() callback to be called before transmission of a - * deauthentication frame in case the association was completed but no - * beacon was heard. This is required in multi-channel scenarios, where the - * virtual interface might not be given air time for the transmission of - * the frame, as it is not synced with the AP/P2P GO yet, and thus the - * deauthentication frame might not be transmitted. - * * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * @@ -2874,7 +2866,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, - IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, IEEE80211_HW_BUFF_MMPDU_TXQ, IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, @@ -3787,13 +3778,15 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @was_assoc: set if this call is due to deauth/disassoc + * while just having been associated * @link_id: the link id on which the frame will be TX'ed. * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; - u8 success:1; + u8 success:1, was_assoc:1; int link_id; }; @@ -4242,12 +4235,9 @@ struct ieee80211_prep_tx_info { * yet it need not necessarily be given airtime, in particular since any * transmission to a P2P GO needs to be synchronized against the GO's * powersave state. mac80211 will call this function before transmitting a - * management frame prior to having successfully associated to allow the - * driver to give it channel time for the transmission, to get a response - * and to be able to synchronize with the GO. - * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211 - * would also call this function before transmitting a deauthentication - * frame in case that no beacon was heard from the AP/P2P GO. + * management frame prior to transmitting that frame to allow the driver + * to give it channel time for the transmission, to get a response and be + * able to synchronize with the GO. * The callback will be called before each transmission and upon return * mac80211 will transmit the frame right away. * Additional information is passed in the &struct ieee80211_prep_tx_info diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 98310188f330..02b5476a4376 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -483,7 +483,6 @@ static const char *hw_flag_names[] = { FLAG(REPORTS_LOW_ACK), FLAG(SUPPORTS_TX_FRAG), FLAG(SUPPORTS_TDLS_BUFFER_STA), - FLAG(DEAUTH_NEED_MGD_TX_PREP), FLAG(DOESNT_SUPPORT_QOS_NDP), FLAG(BUFF_MMPDU_TXQ), FLAG(SUPPORTS_VHT_EXT_NSS_BW), diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a9aefc83d30a..7578ea56c12f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1161,9 +1161,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (WARN_ON(!ieee80211_hw_check(hw, AP_LINK_PS))) return -EINVAL; - - if (WARN_ON(ieee80211_hw_check(hw, DEAUTH_NEED_MGD_TX_PREP))) - return -EINVAL; } #ifdef CONFIG_PM diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 80a10cd49565..4779a18ab75d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3521,6 +3521,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, u64 changed = 0; struct ieee80211_prep_tx_info info = { .subtype = stype, + .was_assoc = true, + .link_id = ffs(sdata->vif.active_links) - 1, }; lockdep_assert_wiphy(local->hw.wiphy); @@ -3569,29 +3571,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* deauthenticate/disassociate now */ if (tx || frame_buf) { - /* - * In multi channel scenarios guarantee that the virtual - * interface is granted immediate airtime to transmit the - * deauthentication frame by calling mgd_prepare_tx, if the - * driver requested so. - */ - if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) { - for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); - link_id++) { - struct ieee80211_link_data *link; - - link = sdata_dereference(sdata->link[link_id], - sdata); - if (!link) - continue; - if (link->u.mgd.have_beacon) - break; - } - if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) { - info.link_id = ffs(sdata->vif.active_links) - 1; - drv_mgd_prepare_tx(sdata->local, sdata, &info); - } - } + drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, sdata->vif.cfg.ap_addr, stype, -- cgit v1.2.3 From 69540b7987ef05d1dff36981d4cc7c31e9716b36 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 27 Jun 2024 17:08:48 +0300 Subject: ethtool: Add ethtool operation to write to a transceiver module EEPROM Ethtool can already retrieve information from a transceiver module EEPROM by invoking the ethtool_ops::get_module_eeprom_by_page operation. Add a corresponding operation that allows ethtool to write to a transceiver module EEPROM. The new write operation is purely an in-kernel API and is not exposed to user space. The purpose of this operation is not to enable arbitrary read / write access, but to allow the kernel to write to specific addresses as part of transceiver module firmware flashing. In the future, more functionality can be implemented on top of these read / write operations. Adjust the comments of the 'ethtool_module_eeprom' structure as it is no longer used only for read access. Signed-off-by: Ido Schimmel Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 959196af7f5a..c7f6f2bc9cac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -506,17 +506,16 @@ struct ethtool_ts_stats { #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f /** - * struct ethtool_module_eeprom - EEPROM dump from specified page - * @offset: Offset within the specified EEPROM page to begin read, in bytes. - * @length: Number of bytes to read. - * @page: Page number to read from. - * @bank: Page bank number to read from, if applicable by EEPROM spec. + * struct ethtool_module_eeprom - plug-in module EEPROM read / write parameters + * @offset: When @offset is 0-127, it is used as an address to the Lower Memory + * (@page must be 0). Otherwise, it is used as an address to the + * Upper Memory. + * @length: Number of bytes to read / write. + * @page: Page number. + * @bank: Bank number, if supported by EEPROM spec. * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most * EEPROMs use 0x50 or 0x51. * @data: Pointer to buffer with EEPROM data of @length size. - * - * This can be used to manage pages during EEPROM dump in ethtool and pass - * required information to the driver. */ struct ethtool_module_eeprom { u32 offset; @@ -824,6 +823,8 @@ struct ethtool_rxfh_param { * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from * specified page. Returns a negative error code or the amount of bytes * read. + * @set_module_eeprom_by_page: Write to a region of plug-in module EEPROM, + * from kernel space only. Returns a negative error code or zero. * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. @@ -958,6 +959,9 @@ struct ethtool_ops { int (*get_module_eeprom_by_page)(struct net_device *dev, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack); + int (*set_module_eeprom_by_page)(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); void (*get_eth_phy_stats)(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct net_device *dev, -- cgit v1.2.3 From 46fb3ba95b93d1887e6dfa02a535e0526062de95 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:50 +0300 Subject: ethtool: Add an interface for flashing transceiver modules' firmware CMIS compliant modules such as QSFP-DD might be running a firmware that can be updated in a vendor-neutral way by exchanging messages between the host and the module as described in section 7.3.1 of revision 5.2 of the CMIS standard. Add a pair of new ethtool messages that allow: * User space to trigger firmware update of transceiver modules * The kernel to notify user space about the progress of the process The user interface is designed to be asynchronous in order to avoid RTNL being held for too long and to allow several modules to be updated simultaneously. The interface is designed with CMIS compliant modules in mind, but kept generic enough to accommodate future use cases, if these arise. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 55 ++++++++++++++++++++++ Documentation/networking/ethtool-netlink.rst | 70 ++++++++++++++++++++++++++++ include/uapi/linux/ethtool.h | 18 +++++++ include/uapi/linux/ethtool_netlink.h | 19 ++++++++ tools/net/ynl/Makefile.deps | 3 +- 5 files changed, 164 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 683f5c3f30ad..949e2722505d 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -20,6 +20,10 @@ definitions: name: header-flags type: flags entries: [ compact-bitsets, omit-reply, stats ] + - + name: module-fw-flash-status + type: enum + entries: [ started, in_progress, completed, error ] attribute-sets: - @@ -1004,6 +1008,32 @@ attribute-sets: - name: burst-tmr type: u32 + - + name: module-fw-flash + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: file-name + type: string + - + name: password + type: u32 + - + name: status + type: u32 + enum: module-fw-flash-status + - + name: status-msg + type: string + - + name: done + type: uint + - + name: total + type: uint operations: enum-model: directional @@ -1764,3 +1794,28 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get + - + name: module-fw-flash-act + doc: Flash transceiver module firmware. + + attribute-set: module-fw-flash + + do: + request: + attributes: + - header + - file-name + - password + - + name: module-fw-flash-ntf + doc: Notification for firmware flashing progress and status. + + attribute-set: module-fw-flash + + event: + attributes: + - header + - status + - status-msg + - done + - total diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7ec08e903bab..bfe2eda8580d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -228,6 +228,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PLCA_GET_STATUS`` get PLCA RS status ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters + ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware ===================================== ================================= Kernel to userspace: @@ -274,6 +275,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PLCA_GET_STATUS_REPLY`` PLCA RS status ``ETHTOOL_MSG_PLCA_NTF`` PLCA RS parameters ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status + ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2041,6 +2043,73 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg +MODULE_FW_FLASH_ACT +=================== + +Flashes transceiver module firmware. + +Request contents: + + ======================================= ====== =========================== + ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` nested request header + ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` string firmware image file name + ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` u32 transceiver module password + ======================================= ====== =========================== + +The firmware update process consists of three logical steps: + +1. Downloading a firmware image to the transceiver module and validating it. +2. Running the firmware image. +3. Committing the firmware image so that it is run upon reset. + +When flash command is given, those three steps are taken in that order. + +This message merely schedules the update process and returns immediately +without blocking. The process then runs asynchronously. +Since it can take several minutes to complete, during the update process +notifications are emitted from the kernel to user space updating it about +the status and progress. + +The ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` attribute encodes the firmware +image file name. The firmware image is downloaded to the transceiver module, +validated, run and committed. + +The optional ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` attribute encodes a password +that might be required as part of the transceiver module firmware update +process. + +The firmware update process can take several minutes to complete. Therefore, +during the update process notifications are emitted from the kernel to user +space updating it about the status and progress. + + + +Notification contents: + + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` | nested | reply header | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` | u32 | status | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` | string | status message | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` | uint | progress | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL`` | uint | total | + +---------------------------------------------------+--------+----------------+ + +The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` attribute encodes the current status +of the firmware update process. Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_module_fw_flash_status + +The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` attribute encodes a status message +string. + +The ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` and ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL`` +attributes encode the completed and total amount of work, respectively. + Request translation =================== @@ -2147,4 +2216,5 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` + n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8733a3117902..e011384c915c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -877,6 +877,24 @@ enum ethtool_mm_verify_status { ETHTOOL_MM_VERIFY_STATUS_DISABLED, }; +/** + * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status + * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has + * started. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process + * is in progress. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was + * completed successfully. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was + * stopped due to an error. + */ +enum ethtool_module_fw_flash_status { + ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d15856c7e001..840dabdc9d88 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -1018,6 +1020,23 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index f4e8eb79c1b8..dbdca32a1c61 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -16,7 +16,8 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) -CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) +CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) -- cgit v1.2.3 From 31e0aa99dc02b2b038a270b0670fc8201b69ec8a Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:52 +0300 Subject: ethtool: Veto some operations during firmware flashing process Some operations cannot be performed during the firmware flashing process. For example: - Port must be down during the whole flashing process to avoid packet loss while committing reset for example. - Writing to EEPROM interrupts the flashing process, so operations like ethtool dump, module reset, get and set power mode should be vetoed. - Split port firmware flashing should be vetoed. In order to veto those scenarios, add a flag in 'struct net_device' that indicates when a firmware flash is taking place on the module and use it to prevent interruptions during the process. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- net/ethtool/eeprom.c | 6 ++++++ net/ethtool/ioctl.c | 12 ++++++++++++ net/ethtool/netlink.c | 12 ++++++++++++ 4 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc18acd3c58b..1e3401093c13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1990,6 +1990,8 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @module_fw_flash_in_progress: Module firmware flashing is in progress. + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2374,7 +2376,7 @@ struct net_device { bool proto_down; bool threaded; unsigned wol_enabled:1; - + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 6209c3a9c8f7..f36811b3ecf1 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -91,6 +91,12 @@ static int get_module_eeprom_by_page(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e645d751a5e8..1cca372c0d80 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -658,6 +658,9 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + memset(&link_ksettings, 0, sizeof(link_ksettings)); err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); if (err < 0) @@ -1449,6 +1452,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) if (!dev->ethtool_ops->reset) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (copy_from_user(&reset, useraddr, sizeof(reset))) return -EFAULT; @@ -2462,6 +2468,9 @@ int ethtool_get_module_info_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_info(dev->sfp_bus, modinfo); @@ -2499,6 +2508,9 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_eeprom(dev->sfp_bus, ee, data); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 393ce668fb04..a5907bbde427 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -765,10 +765,22 @@ static void ethnl_notify_features(struct netdev_notifier_info *info) static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *dev; + + dev = netdev_notifier_info_to_dev(info); + extack = netdev_notifier_info_to_extack(info); + switch (event) { case NETDEV_FEAT_CHANGE: ethnl_notify_features(ptr); break; + case NETDEV_PRE_UP: + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware"); + return NOTIFY_BAD; + } } return NOTIFY_DONE; -- cgit v1.2.3 From e4f91936993ce4d0954688ec3cb80b3471b9eda5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:53 +0300 Subject: net: sfp: Add more extended compliance codes SFF-8024 is used to define various constants re-used in several SFF SFP-related specifications. Add SFF-8024 extended compliance code definitions for CMIS compliant modules and use them in the next patch to determine the firmware flashing work. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index a45da7eef9a2..b14be59550e3 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -284,6 +284,12 @@ enum { SFF8024_ID_QSFP_8438 = 0x0c, SFF8024_ID_QSFP_8436_8636 = 0x0d, SFF8024_ID_QSFP28_8636 = 0x11, + SFF8024_ID_QSFP_DD = 0x18, + SFF8024_ID_OSFP = 0x19, + SFF8024_ID_DSFP = 0x1B, + SFF8024_ID_QSFP_PLUS_CMIS = 0x1E, + SFF8024_ID_SFP_DD_CMIS = 0x1F, + SFF8024_ID_SFP_PLUS_CMIS = 0x20, SFF8024_ENCODING_UNSPEC = 0x00, SFF8024_ENCODING_8B10B = 0x01, -- cgit v1.2.3 From 3ebbd9f6de7ec6d538639ebb657246f629ace81e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:46 +0100 Subject: net: move ethtool-related netdev state into its own struct net_dev->ethtool is a pointer to new struct ethtool_netdev_state, which currently contains only the wol_enabled field. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/293a562278371de7534ed1eb17531838ca090633.1719502239.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c | 4 ++-- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 2 +- drivers/net/phy/phy.c | 2 +- drivers/net/phy/phy_device.c | 5 +++-- drivers/net/phy/phylink.c | 2 +- include/linux/ethtool.h | 8 ++++++++ include/linux/netdevice.h | 8 +++++--- net/core/dev.c | 4 ++++ net/ethtool/ioctl.c | 2 +- net/ethtool/wol.c | 2 +- 11 files changed, 29 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9246ea2118ff..714d2e804694 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1608,7 +1608,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); - tp->dev->wol_enabled = wolopts ? 1 : 0; + tp->dev->ethtool->wol_enabled = wolopts ? 1 : 0; } } @@ -5478,7 +5478,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); - dev->wol_enabled = 1; + dev->ethtool->wol_enabled = 1; } jumbo_max = rtl_jumbo_max(tp); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 46a5a3e95202..e868f7ef4920 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -37,9 +37,9 @@ static int ngbe_set_wol(struct net_device *netdev, wx->wol = 0; if (wol->wolopts & WAKE_MAGIC) wx->wol = WX_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled); + device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled); return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d..a8119de60deb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -650,7 +650,7 @@ static int ngbe_probe(struct pci_dev *pdev, if (wx->wol_hw_supported) wx->wol = NGBE_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); device_set_wakeup_enable(&pdev->dev, wx->wol); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c4236564c1cd..785182fa5fe0 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1309,7 +1309,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (netdev) { struct device *parent = netdev->dev.parent; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) pm_system_wakeup(); else if (device_may_wakeup(&netdev->dev)) pm_wakeup_dev_event(&netdev->dev, 0, true); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6c6ec9475709..473cbc1d497b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -296,7 +296,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) goto out; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) return false; /* As long as not all affected network drivers support the @@ -1984,7 +1984,8 @@ int phy_suspend(struct phy_device *phydev) return 0; phy_ethtool_get_wol(phydev, &wol); - phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled); + phydev->wol_enabled = wol.wolopts || + (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) return -EBUSY; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6c24c48dcf0f..51c526d227fa 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2282,7 +2282,7 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) { ASSERT_RTNL(); - if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) { + if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) { /* Wake-on-Lan enabled, MAC handling */ mutex_lock(&pl->state_mutex); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c7f6f2bc9cac..374639e661d1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,6 +1004,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +/** + * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @wol_enabled: Wake-on-LAN is enabled + */ +struct ethtool_netdev_state { + unsigned wol_enabled:1; +}; + struct phy_device; struct phy_tdr_config; struct phy_plca_cfg; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e3401093c13..3c719f0d5f5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -80,6 +80,7 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +struct ethtool_netdev_state; typedef u32 xdp_features_t; @@ -1986,8 +1987,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @wol_enabled: Wake-on-LAN is enabled - * * @threaded: napi threaded mode is enabled * * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -2001,6 +2000,7 @@ enum netdev_reg_state { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of @@ -2375,7 +2375,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; - unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; @@ -2386,6 +2386,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + struct ethtool_netdev_state *ethtool; + /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; diff --git a/net/core/dev.c b/net/core/dev.c index 0a23d7da7fbc..c0c6e200e17f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11116,6 +11116,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; + dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT); + if (!dev->ethtool) + goto free_all; strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -11166,6 +11169,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1cca372c0d80..94059ce9e5f2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1509,7 +1509,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL); return 0; diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index 0ed56c9ac1bc..a39d8000d808 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; return 1; } -- cgit v1.2.3 From 6ad2962f8adfd53fca52dce7f830783e95d99ce7 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:47 +0100 Subject: net: ethtool: attach an XArray of custom RSS contexts to a netdevice Each context stores the RXFH settings (indir, key, and hfunc) as well as optionally some driver private data. Delete any still-existing contexts at netdev unregister time. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/cbd1c402cec38f2e03124f2ab65b4ae4e08bd90d.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 27 +++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 374639e661d1..c741d6403364 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -159,6 +159,46 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) return index % n_rx_rings; } +/** + * struct ethtool_rxfh_context - a custom RSS context configuration + * @indir_size: Number of u32 entries in indirection table + * @key_size: Size of hash key, in bytes + * @priv_size: Size of driver private data, in bytes + * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + * @indir_configured: indir has been specified (at create time or subsequently) + * @key_configured: hkey has been specified (at create time or subsequently) + */ +struct ethtool_rxfh_context { + u32 indir_size; + u32 key_size; + u16 priv_size; + u8 hfunc; + u8 input_xfrm; + u8 indir_configured:1; + u8 key_configured:1; + /* private: driver private data, indirection table, and hash key are + * stored sequentially in @data area. Use below helpers to access. + */ + u8 data[] __aligned(sizeof(void *)); +}; + +static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx) +{ + return ctx->data; +} + +static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) +{ + return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32))); +} + +static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) +{ + return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -1006,9 +1046,11 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @rss_ctx: XArray of custom RSS contexts * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { + struct xarray rss_ctx; unsigned wol_enabled:1; }; diff --git a/net/core/dev.c b/net/core/dev.c index c0c6e200e17f..bce810f8ae53 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10336,6 +10336,9 @@ int register_netdevice(struct net_device *dev) if (ret) return ret; + /* rss ctx ID 0 is reserved for the default context, start from 1 */ + xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11235,6 +11238,28 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); +static void netdev_rss_contexts_free(struct net_device *dev) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + struct ethtool_rxfh_param rxfh; + + rxfh.indir = ethtool_rxfh_context_indir(ctx); + rxfh.key = ethtool_rxfh_context_key(ctx); + rxfh.hfunc = ctx->hfunc; + rxfh.input_xfrm = ctx->input_xfrm; + rxfh.rss_context = context; + rxfh.rss_delete = true; + + xa_erase(&dev->ethtool->rss_ctx, context); + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + kfree(ctx); + } + xa_destroy(&dev->ethtool->rss_ctx); +} + /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -11338,6 +11363,8 @@ void unregister_netdevice_many_notify(struct list_head *head, netdev_name_node_alt_flush(dev); netdev_name_node_free(dev->name_node); + netdev_rss_contexts_free(dev); + call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); if (dev->netdev_ops->ndo_uninit) -- cgit v1.2.3 From eac9122f0c41b832065e01977c34946ec8e76c24 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:48 +0100 Subject: net: ethtool: record custom RSS contexts in the XArray Since drivers are still choosing the context IDs, we have to force the XArray to use the ID they've chosen rather than picking one ourselves, and handle the case where they give us an ID that's already in use. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/801f5faa4cec87c65b2c6e27fb220c944bce593a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 14 ++++++++++ net/ethtool/ioctl.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c741d6403364..43a2a143034f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); } +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size_t(struct ethtool_rxfh_context, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -710,6 +721,8 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -895,6 +908,7 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 94059ce9e5f2..e901a20e97f5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1281,10 +1281,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; struct ethtool_rxfh_param rxfh_dev = {}; + struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; u32 indir_bytes = 0; + bool create = false; u8 *rss_config; int ret; @@ -1312,6 +1314,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key, function @@ -1377,13 +1380,42 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (create) { + if (rxfh_dev.rss_delete) { + ret = -EINVAL; + goto out; + } + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, + dev_key_size, + ops->rxfh_priv_size), + GFP_KERNEL_ACCOUNT); + if (!ctx) { + ret = -ENOMEM; + goto out; + } + ctx->indir_size = dev_indir_size; + ctx->key_size = dev_key_size; + ctx->hfunc = rxfh.hfunc; + ctx->input_xfrm = rxfh.input_xfrm; + ctx->priv_size = ops->rxfh_priv_size; + } else if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) + if (ret) { + if (create) + /* failed to create, free our new tracking entry */ + kfree(ctx); goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1396,6 +1428,46 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) dev->priv_flags |= IFF_RXFH_CONFIGURED; } + /* Update rss_ctx tracking */ + if (create) { + /* Ideally this should happen before calling the driver, + * so that we can fail more cleanly; but we don't have the + * context ID until the driver picks it, so we have to + * wait until after. + */ + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + /* context ID reused, our tracking is screwed */ + kfree(ctx); + goto out; + } + /* Allocate the exact ID the driver gave us */ + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + ctx, GFP_KERNEL))) { + kfree(ctx); + goto out; + } + ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; + ctx->key_configured = !!rxfh.key_size; + } + if (rxfh_dev.rss_delete) { + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); + kfree(ctx); + } else if (ctx) { + if (rxfh_dev.indir) { + for (i = 0; i < dev_indir_size; i++) + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; + ctx->indir_configured = 1; + } + if (rxfh_dev.key) { + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, + dev_key_size); + ctx->key_configured = 1; + } + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = rxfh_dev.hfunc; + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = rxfh_dev.input_xfrm; + } out: kfree(rss_config); -- cgit v1.2.3 From 847a8ab186767be6ee95643f9739fa9d0f839589 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:49 +0100 Subject: net: ethtool: let the core choose RSS context IDs Add a new API to create/modify/remove RSS contexts, that passes in the newly-chosen context ID (not as a pointer) rather than leaving the driver to choose it on create. Also pass in the ctx, allowing drivers to easily use its private data area to store their hardware-specific state. Keep the existing .set_rxfh API for now as a fallback, but deprecate it for custom contexts (rss_context != 0). Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/45f1fe61df2163c091ec394c9f52000c8b16cc3b.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 40 ++++++++++++++++++++++++++++++++++++++++ net/core/dev.c | 6 +++++- net/ethtool/ioctl.c | 46 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 79 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 43a2a143034f..4292a25b2427 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -723,6 +723,10 @@ struct ethtool_rxfh_param { * RSS. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). + * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this + * is zero then the core may choose any (nonzero) ID, otherwise the core + * will only use IDs strictly less than this value, as the @rss_context + * argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -819,6 +823,32 @@ struct ethtool_rxfh_param { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @create_rxfh_context: Create a new RSS context with the specified RX flow + * hash indirection table, hash key, and hash function. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that the indir table, hkey and hfunc are not yet populated as + * of this call. The driver does not need to update these; the core + * will do so if this op succeeds. + * However, if @rxfh.indir is set to %NULL, the driver must update the + * indir table in @ctx with the (default or inherited) table actually in + * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is + * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE, + * the driver should update the corresponding information in @ctx. + * If the driver provides this method, it must also provide + * @modify_rxfh_context and @remove_rxfh_context. + * Returns a negative error code or zero. + * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting + * the contents of the RX flow hash indirection table, hash key, and/or + * hash function associated with the given context. + * Parameters which are set to %NULL or zero will remain unchanged. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that it will still contain the *old* settings. The driver does + * not need to update these; the core will do so if this op succeeds. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. + * @remove_rxfh_context: Remove the specified RSS context. + * The &struct ethtool_rxfh_context for this context is passed in @ctx. + * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -909,6 +939,7 @@ struct ethtool_ops { u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; u16 rxfh_priv_size; + u32 rxfh_max_context_id; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -971,6 +1002,15 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*create_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*modify_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*remove_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + u32 rss_context); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/dev.c b/net/core/dev.c index bce810f8ae53..8481dc4f2196 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11254,7 +11254,11 @@ static void netdev_rss_contexts_free(struct net_device *dev) rxfh.rss_delete = true; xa_erase(&dev->ethtool->rss_ctx, context); - dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + if (dev->ethtool_ops->create_rxfh_context) + dev->ethtool_ops->remove_rxfh_context(dev, ctx, + context); + else + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e901a20e97f5..46b50ddde013 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1395,9 +1395,24 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } ctx->indir_size = dev_indir_size; ctx->key_size = dev_key_size; - ctx->hfunc = rxfh.hfunc; - ctx->input_xfrm = rxfh.input_xfrm; ctx->priv_size = ops->rxfh_priv_size; + /* Initialise to an empty context */ + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { + u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 ctx_id; + + /* driver uses new API, core allocates ID */ + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out; + } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; + } } else if (rxfh.rss_context) { ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); if (!ctx) { @@ -1409,11 +1424,24 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) { + if (rxfh.rss_context && ops->create_rxfh_context) { if (create) + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev); + else if (rxfh_dev.rss_delete) + ret = ops->remove_rxfh_context(dev, ctx, + rxfh.rss_context); + else + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev); + } else { + ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } + if (ret) { + if (create) { /* failed to create, free our new tracking entry */ + if (ops->create_rxfh_context) + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); kfree(ctx); + } goto out; } @@ -1429,12 +1457,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, dev->priv_flags |= IFF_RXFH_CONFIGURED; } /* Update rss_ctx tracking */ - if (create) { - /* Ideally this should happen before calling the driver, - * so that we can fail more cleanly; but we don't have the - * context ID until the driver picks it, so we have to - * wait until after. - */ + if (create && !ops->create_rxfh_context) { + /* driver uses old API, it chose context ID */ if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { /* context ID reused, our tracking is screwed */ kfree(ctx); @@ -1446,8 +1470,6 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } - ctx->indir_configured = rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; - ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); -- cgit v1.2.3 From 30a32cdf6b130356805b3193a6208de25cbb2015 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:50 +0100 Subject: net: ethtool: add an extack parameter to new rxfh_context APIs Currently passed as NULL, but will allow drivers to report back errors when ethnl support for these ops is added. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/6e0012347d175fdd1280363d7bfa76a2f2777e17.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 9 ++++++--- net/core/dev.c | 2 +- net/ethtool/ioctl.c | 9 ++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4292a25b2427..9cdbc8e3ed5c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,13 +1004,16 @@ struct ethtool_ops { struct netlink_ext_ack *extack); int (*create_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*modify_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*remove_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - u32 rss_context); + u32 rss_context, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); diff --git a/net/core/dev.c b/net/core/dev.c index 8481dc4f2196..2daed4464c08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11256,7 +11256,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) xa_erase(&dev->ethtool->rss_ctx, context); if (dev->ethtool_ops->create_rxfh_context) dev->ethtool_ops->remove_rxfh_context(dev, ctx, - context); + context, NULL); else dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); kfree(ctx); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 46b50ddde013..82c610e9e6b2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1426,12 +1426,15 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh.rss_context && ops->create_rxfh_context) { if (create) - ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev); + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, + extack); else if (rxfh_dev.rss_delete) ret = ops->remove_rxfh_context(dev, ctx, - rxfh.rss_context); + rxfh.rss_context, + extack); else - ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev); + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, + extack); } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } -- cgit v1.2.3 From 87925151191b64d9623e63ccf11e517eacc99d7d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:51 +0100 Subject: net: ethtool: add a mutex protecting RSS contexts While this is not needed to serialise the ethtool entry points (which are all under RTNL), drivers may have cause to asynchronously access dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to do this safely without needing to take the RTNL. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/7f9c15eb7525bf87af62c275dde3a8570ee8bf0a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 3 +++ net/core/dev.c | 5 +++++ net/ethtool/ioctl.c | 7 +++++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9cdbc8e3ed5c..f74bb0cf8ed1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1104,10 +1104,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { struct xarray rss_ctx; + struct mutex rss_lock; unsigned wol_enabled:1; }; diff --git a/net/core/dev.c b/net/core/dev.c index 2daed4464c08..385c4091aa77 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10338,6 +10338,7 @@ int register_netdevice(struct net_device *dev) /* rss ctx ID 0 is reserved for the default context, start from 1 */ xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + mutex_init(&dev->ethtool->rss_lock); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11243,6 +11244,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) struct ethtool_rxfh_context *ctx; unsigned long context; + mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { struct ethtool_rxfh_param rxfh; @@ -11262,6 +11264,7 @@ static void netdev_rss_contexts_free(struct net_device *dev) kfree(ctx); } xa_destroy(&dev->ethtool->rss_ctx); + mutex_unlock(&dev->ethtool->rss_lock); } /** @@ -11374,6 +11377,8 @@ void unregister_netdevice_many_notify(struct list_head *head, if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + mutex_destroy(&dev->ethtool->rss_lock); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 82c610e9e6b2..939ccd106fe1 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1285,6 +1285,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; + bool locked = false; /* dev->ethtool->rss_lock taken */ u32 indir_bytes = 0; bool create = false; u8 *rss_config; @@ -1380,6 +1381,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (rxfh.rss_context) { + mutex_lock(&dev->ethtool->rss_lock); + locked = true; + } if (create) { if (rxfh_dev.rss_delete) { ret = -EINVAL; @@ -1495,6 +1500,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } out: + if (locked) + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; } -- cgit v1.2.3 From 7c8110057b1bb9ab9f47ac0efc554d5c3a53b693 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:50 -0700 Subject: tcp_metrics: add UAPI to the header guard tcp_metrics' header lacks the customary _UAPI in the header guard. This makes YNL build rules work less seamlessly. We can easily fix that on YNL side, but this could also be problematic if we ever needed to create a kernel-only tcp_metrics.h. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 7cb4a172feed..c48841076998 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* tcp_metrics.h - TCP Metrics Interface */ -#ifndef _LINUX_TCP_METRICS_H -#define _LINUX_TCP_METRICS_H +#ifndef _UAPI_LINUX_TCP_METRICS_H +#define _UAPI_LINUX_TCP_METRICS_H #include @@ -58,4 +58,4 @@ enum { #define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1) -#endif /* _LINUX_TCP_METRICS_H */ +#endif /* _UAPI_LINUX_TCP_METRICS_H */ -- cgit v1.2.3 From 85674625e0bc25be4dbaa165a556ef6037328379 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:51 -0700 Subject: tcp_metrics: add netlink protocol spec in YAML Add a protocol spec for tcp_metrics, so that it's accessible via YNL. Useful at the very least for testing fixes. In this episode of "10,000 ways to complicate netlink" the metric nest has defines which are off by 1. iproute2 does: struct rtattr *m[TCP_METRIC_MAX + 1 + 1]; parse_rtattr_nested(m, TCP_METRIC_MAX + 1, a); for (i = 0; i < TCP_METRIC_MAX + 1; i++) { // ... attr = m[i + 1]; This is too weird to support in YNL, add a new set of defines with _correct_ values to the official kernel header. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- Documentation/netlink/specs/tcp_metrics.yaml | 169 +++++++++++++++++++++++++++ include/uapi/linux/tcp_metrics.h | 16 +++ tools/net/ynl/Makefile.deps | 1 + 3 files changed, 186 insertions(+) create mode 100644 Documentation/netlink/specs/tcp_metrics.yaml (limited to 'include') diff --git a/Documentation/netlink/specs/tcp_metrics.yaml b/Documentation/netlink/specs/tcp_metrics.yaml new file mode 100644 index 000000000000..1bd94f43e526 --- /dev/null +++ b/Documentation/netlink/specs/tcp_metrics.yaml @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: tcp_metrics + +protocol: genetlink-legacy + +doc: | + Management interface for TCP metrics. + +c-family-name: tcp-metrics-genl-name +c-version-name: tcp-metrics-genl-version +max-by-define: true +kernel-policy: global + +definitions: + - + name: tcp-fastopen-cookie-max + type: const + value: 16 + +attribute-sets: + - + name: tcp-metrics + name-prefix: tcp-metrics-attr- + attributes: + - + name: addr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: addr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: age + type: u64 + - + name: tw-tsval + type: u32 + doc: unused + - + name: tw-ts-stamp + type: s32 + doc: unused + - + name: vals + type: nest + nested-attributes: metrics + - + name: fopen-mss + type: u16 + - + name: fopen-syn-drops + type: u16 + - + name: fopen-syn-drop-ts + type: u64 + - + name: fopen-cookie + type: binary + checks: + min-len: tcp-fastopen-cookie-max + - + name: saddr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: saddr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: pad + type: pad + + - + name: metrics + # Intentionally don't define the name-prefix, see below. + doc: | + Attributes with metrics. Note that the values here do not match + the TCP_METRIC_* defines in the kernel, because kernel defines + are off-by one (e.g. rtt is defined as enum 0, while netlink carries + attribute type 1). + attributes: + - + name: rtt + type: u32 + doc: | + Round Trip Time (RTT), in msecs with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar + type: u32 + doc: | + Round Trip Time VARiance (RTT), in msecs with 2 bits fractional + (left-shift by 2 to get the msec value). + - + name: ssthresh + type: u32 + doc: Slow Start THRESHold. + - + name: cwnd + type: u32 + doc: Congestion Window. + - + name: reodering + type: u32 + doc: Reodering metric. + - + name: rtt-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 2 bits fractional + (left-shift by 3 to get the msec value). + +operations: + list: + - + name: get + doc: Retrieve metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + + do: + request: &sel_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + reply: &all_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + - age + - vals + - fopen-mss + - fopen-syn-drops + - fopen-syn-drop-ts + - fopen-cookie + dump: + reply: *all_attrs + + - + name: del + doc: Delete metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + flags: [ admin-perm ] + + do: + request: *sel_attrs diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index c48841076998..927c735a5b0e 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -27,6 +27,22 @@ enum tcp_metric_index { #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1) +/* Re-define enum tcp_metric_index, again, using the values carried + * as netlink attribute types. + */ +enum { + TCP_METRICS_A_METRICS_RTT = 1, + TCP_METRICS_A_METRICS_RTTVAR, + TCP_METRICS_A_METRICS_SSTHRESH, + TCP_METRICS_A_METRICS_CWND, + TCP_METRICS_A_METRICS_REODERING, + TCP_METRICS_A_METRICS_RTT_US, + TCP_METRICS_A_METRICS_RTTVAR_US, + + __TCP_METRICS_A_METRICS_MAX +}; +#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1) + enum { TCP_METRICS_ATTR_UNSPEC, TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */ diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index dbdca32a1c61..0712b5e82eb7 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -26,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) -- cgit v1.2.3 From 89cc8f1c5f22568142b7ad118c738204708e4207 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jun 2024 00:26:48 +0200 Subject: netfilter: nf_tables: Add flowtable map for xdp offload This adds a small internal mapping table so that a new bpf (xdp) kfunc can perform lookups in a flowtable. As-is, xdp program has access to the device pointer, but no way to do a lookup in a flowtable -- there is no way to obtain the needed struct without questionable stunts. This allows to obtain an nf_flowtable pointer given a net_device structure. In order to keep backward compatibility, the infrastructure allows the user to add a given device to multiple flowtables, but it will always return the first added mapping performing the lookup since it assumes the right configuration is 1:1 mapping between flowtables and net_devices. Co-developed-by: Lorenzo Bianconi Signed-off-by: Florian Westphal Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 5 ++ net/netfilter/Makefile | 2 +- net/netfilter/nf_flow_table_offload.c | 2 +- net/netfilter/nf_flow_table_xdp.c | 147 ++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 net/netfilter/nf_flow_table_xdp.c (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 9abb7ee40d72..d845745207d2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -305,6 +305,11 @@ struct flow_ports { __be16 source, dest; }; +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev); +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 614815a3ed73..18046872a38a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -142,7 +142,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o # flow table infrastructure obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ - nf_flow_table_offload.o + nf_flow_table_offload.o nf_flow_table_xdp.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a010b25076ca..ff1a4e36c2b5 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1192,7 +1192,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable, int err; if (!nf_flowtable_hw_offload(flowtable)) - return 0; + return nf_flow_offload_xdp_setup(flowtable, dev, cmd); if (dev->netdev_ops->ndo_setup_tc) err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, diff --git a/net/netfilter/nf_flow_table_xdp.c b/net/netfilter/nf_flow_table_xdp.c new file mode 100644 index 000000000000..e1252d042699 --- /dev/null +++ b/net/netfilter/nf_flow_table_xdp.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +struct flow_offload_xdp_ft { + struct list_head head; + struct nf_flowtable *ft; + struct rcu_head rcuhead; +}; + +struct flow_offload_xdp { + struct hlist_node hnode; + unsigned long net_device_addr; + struct list_head head; +}; + +#define NF_XDP_HT_BITS 4 +static DEFINE_HASHTABLE(nf_xdp_hashtable, NF_XDP_HT_BITS); +static DEFINE_MUTEX(nf_xdp_hashtable_lock); + +/* caller must hold rcu read lock */ +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev) +{ + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp *iter; + + hash_for_each_possible_rcu(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + struct flow_offload_xdp_ft *ft_elem; + + /* The user is supposed to insert a given net_device + * just into a single nf_flowtable so we always return + * the first element here. + */ + ft_elem = list_first_or_null_rcu(&iter->head, + struct flow_offload_xdp_ft, + head); + return ft_elem ? ft_elem->ft : NULL; + } + } + + return NULL; +} + +static int nf_flowtable_by_dev_insert(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + struct flow_offload_xdp_ft *ft_elem; + + ft_elem = kzalloc(sizeof(*ft_elem), GFP_KERNEL_ACCOUNT); + if (!ft_elem) + return -ENOMEM; + + ft_elem->ft = ft; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (!elem) { + elem = kzalloc(sizeof(*elem), GFP_KERNEL_ACCOUNT); + if (!elem) + goto err_unlock; + + elem->net_device_addr = key; + INIT_LIST_HEAD(&elem->head); + hash_add_rcu(nf_xdp_hashtable, &elem->hnode, key); + } + list_add_tail_rcu(&ft_elem->head, &elem->head); + + mutex_unlock(&nf_xdp_hashtable_lock); + + return 0; + +err_unlock: + mutex_unlock(&nf_xdp_hashtable_lock); + kfree(ft_elem); + + return -ENOMEM; +} + +static void nf_flowtable_by_dev_remove(struct nf_flowtable *ft, + const struct net_device *dev) +{ + struct flow_offload_xdp *iter, *elem = NULL; + unsigned long key = (unsigned long)dev; + + mutex_lock(&nf_xdp_hashtable_lock); + + hash_for_each_possible(nf_xdp_hashtable, iter, hnode, key) { + if (key == iter->net_device_addr) { + elem = iter; + break; + } + } + + if (elem) { + struct flow_offload_xdp_ft *ft_elem, *ft_next; + + list_for_each_entry_safe(ft_elem, ft_next, &elem->head, head) { + if (ft_elem->ft == ft) { + list_del_rcu(&ft_elem->head); + kfree_rcu(ft_elem, rcuhead); + } + } + + if (list_empty(&elem->head)) + hash_del_rcu(&elem->hnode); + else + elem = NULL; + } + + mutex_unlock(&nf_xdp_hashtable_lock); + + if (elem) { + synchronize_rcu(); + kfree(elem); + } +} + +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd) +{ + switch (cmd) { + case FLOW_BLOCK_BIND: + return nf_flowtable_by_dev_insert(flowtable, dev); + case FLOW_BLOCK_UNBIND: + nf_flowtable_by_dev_remove(flowtable, dev); + return 0; + } + + WARN_ON_ONCE(1); + return 0; +} -- cgit v1.2.3 From 391bb6594fd3a567efb1cd3efc8136c78c4c9e31 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 30 Jun 2024 00:26:49 +0200 Subject: netfilter: Add bpf_xdp_flow_lookup kfunc Introduce bpf_xdp_flow_lookup kfunc in order to perform the lookup of a given flowtable entry based on a fib tuple of incoming traffic. bpf_xdp_flow_lookup can be used as building block to offload in xdp the processing of sw flowtable when hw flowtable is not available. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/55d38a4e5856f6d1509d823ff4e98aaa6d356097.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 10 +++ net/netfilter/Makefile | 5 ++ net/netfilter/nf_flow_table_bpf.c | 121 ++++++++++++++++++++++++++++++++++ net/netfilter/nf_flow_table_inet.c | 2 +- 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 net/netfilter/nf_flow_table_bpf.c (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d845745207d2..b63d53bb9dd6 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -315,6 +315,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) +extern int nf_flow_register_bpf(void); +#else +static inline int nf_flow_register_bpf(void) +{ + return 0; +} +#endif + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 18046872a38a..f0aa4d7ef499 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -144,6 +144,11 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \ nf_flow_table_offload.o nf_flow_table_xdp.o nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o +ifeq ($(CONFIG_NF_FLOW_TABLE),m) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_flow_table_bpf.o +else ifeq ($(CONFIG_NF_FLOW_TABLE),y) +nf_flow_table-$(CONFIG_DEBUG_INFO_BTF) += nf_flow_table_bpf.o +endif obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o diff --git a/net/netfilter/nf_flow_table_bpf.c b/net/netfilter/nf_flow_table_bpf.c new file mode 100644 index 000000000000..4a5f5195f2d2 --- /dev/null +++ b/net/netfilter/nf_flow_table_bpf.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Flow Table Helpers for XDP hook + * + * These are called from the XDP programs. + * Note that it is allowed to break compatibility for these functions since + * the interface they are exposed through to BPF programs is explicitly + * unstable. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* bpf_flowtable_opts - options for bpf flowtable helpers + * @error: out parameter, set for any encountered error + */ +struct bpf_flowtable_opts { + s32 error; +}; + +enum { + NF_BPF_FLOWTABLE_OPTS_SZ = 4, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in nf_flow_table BTF"); + +__bpf_kfunc_start_defs(); + +static struct flow_offload_tuple_rhash * +bpf_xdp_flow_tuple_lookup(struct net_device *dev, + struct flow_offload_tuple *tuple, __be16 proto) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *nf_flow_table; + struct flow_offload *nf_flow; + + nf_flow_table = nf_flowtable_by_dev(dev); + if (!nf_flow_table) + return ERR_PTR(-ENOENT); + + tuplehash = flow_offload_lookup(nf_flow_table, tuple); + if (!tuplehash) + return ERR_PTR(-ENOENT); + + nf_flow = container_of(tuplehash, struct flow_offload, + tuplehash[tuplehash->tuple.dir]); + flow_offload_refresh(nf_flow_table, nf_flow, false); + + return tuplehash; +} + +__bpf_kfunc struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *ctx, struct bpf_fib_lookup *fib_tuple, + struct bpf_flowtable_opts *opts, u32 opts_len) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct flow_offload_tuple tuple = { + .iifidx = fib_tuple->ifindex, + .l3proto = fib_tuple->family, + .l4proto = fib_tuple->l4_protocol, + .src_port = fib_tuple->sport, + .dst_port = fib_tuple->dport, + }; + struct flow_offload_tuple_rhash *tuplehash; + __be16 proto; + + if (opts_len != NF_BPF_FLOWTABLE_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + switch (fib_tuple->family) { + case AF_INET: + tuple.src_v4.s_addr = fib_tuple->ipv4_src; + tuple.dst_v4.s_addr = fib_tuple->ipv4_dst; + proto = htons(ETH_P_IP); + break; + case AF_INET6: + tuple.src_v6 = *(struct in6_addr *)&fib_tuple->ipv6_src; + tuple.dst_v6 = *(struct in6_addr *)&fib_tuple->ipv6_dst; + proto = htons(ETH_P_IPV6); + break; + default: + opts->error = -EAFNOSUPPORT; + return NULL; + } + + tuplehash = bpf_xdp_flow_tuple_lookup(xdp->rxq->dev, &tuple, proto); + if (IS_ERR(tuplehash)) { + opts->error = PTR_ERR(tuplehash); + return NULL; + } + + return tuplehash; +} + +__diag_pop() + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(nf_ft_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_flow_lookup, KF_TRUSTED_ARGS | KF_RET_NULL) +BTF_KFUNCS_END(nf_ft_kfunc_set) + +static const struct btf_kfunc_id_set nf_flow_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ft_kfunc_set, +}; + +int nf_flow_register_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &nf_flow_kfunc_set); +} +EXPORT_SYMBOL_GPL(nf_flow_register_bpf); diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 6eef15648b7b..88787b45e30d 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -98,7 +98,7 @@ static int __init nf_flow_inet_module_init(void) nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); - return 0; + return nf_flow_register_bpf(); } static void __exit nf_flow_inet_module_exit(void) -- cgit v1.2.3 From 060f4ba6e40338a70932603a3564903acf5f5734 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:44 +0100 Subject: io_uring/net: move charging socket out of zc io_uring Currently, io_uring's io_sg_from_iter() duplicates the part of __zerocopy_sg_from_iter() charging pages to the socket. It'd be too easy to miss while changing it in net/, the chunk is not the most straightforward for outside users and full of internal implementation details. io_uring is not a good place to keep it, deduplicate it by moving out of the callback into __zerocopy_sg_from_iter(). Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ include/linux/socket.h | 2 +- io_uring/net.c | 16 ++++------------ net/core/datagram.c | 10 +++++----- 4 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f4cda3fbdb75..9c29bdd5596d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..2a1ff91d1914 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; diff --git a/io_uring/net.c b/io_uring/net.c index 7c98c4d50946..84a7602bcef1 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1265,14 +1265,14 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); } -static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, +static int io_sg_from_iter_iovec(struct sk_buff *skb, struct iov_iter *from, size_t length) { skb_zcopy_downgrade_managed(skb); - return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); + return zerocopy_fill_skb_from_iter(skb, from, length); } -static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, +static int io_sg_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -1285,7 +1285,7 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, if (!frag) shinfo->flags |= SKBFL_MANAGED_FRAG_REFS; else if (unlikely(!skb_zcopy_managed(skb))) - return __zerocopy_sg_from_iter(NULL, sk, skb, from, length); + return zerocopy_fill_skb_from_iter(skb, from, length); bi.bi_size = min(from->count, length); bi.bi_bvec_done = from->iov_offset; @@ -1312,14 +1312,6 @@ static int io_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - - if (sk && sk->sk_type == SOCK_STREAM) { - sk_wmem_queued_add(sk, truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_charge(sk, truesize); - } else { - refcount_add(truesize, &skb->sk->sk_wmem_alloc); - } return ret; } diff --git a/net/core/datagram.c b/net/core/datagram.c index b0dccefd4a09..16507b7cc4fb 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -610,8 +610,8 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -static int zerocopy_fill_skb_from_iter(struct sk_buff *skb, - struct iov_iter *from, size_t length) +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length) { int frag = skb_shinfo(skb)->nr_frags; @@ -687,11 +687,11 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, int ret; if (msg && msg->msg_ubuf && msg->sg_from_iter) - return msg->sg_from_iter(sk, skb, from, length); + ret = msg->sg_from_iter(skb, from, length); + else + ret = zerocopy_fill_skb_from_iter(skb, from, length); - ret = zerocopy_fill_skb_from_iter(skb, from, length); truesize = skb->truesize - orig_size; - if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); if (!skb_zcopy_pure(skb)) -- cgit v1.2.3 From d7f39aee79f04eeaa42085728423501b33ac5be5 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 26 Jun 2024 20:01:59 -0700 Subject: page_pool: export page_pool_disable_direct_recycling() 56ef27e3 unexported page_pool_unlink_napi() and renamed it to page_pool_disable_direct_recycling(). This is because there was no in-tree user of page_pool_unlink_napi(). Since then Rx queue API and an implementation in bnxt got merged. In the bnxt implementation, it broadly follows the following steps: allocate new queue memory + page pool, stop old rx queue, swap, then destroy old queue memory + page pool. The existing NAPI instance is re-used so when the old page pool that is no longer used but still linked to this shared NAPI instance is destroyed, it will trigger warnings. In my initial patches I unlinked a page pool from a NAPI instance directly. Instead, export page_pool_disable_direct_recycling() and call that instead to avoid having a driver touch a core struct. Suggested-by: Jakub Kicinski Signed-off-by: David Wei Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/net/page_pool/types.h | 1 + net/core/page_pool.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7e8477057f3d..9093a964fc33 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -229,6 +229,7 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3927a0a7fa9a..5f1ed6f2ca8f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1014,7 +1014,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), pool->xdp_mem_id = mem->id; } -static void page_pool_disable_direct_recycling(struct page_pool *pool) +void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. * Paired with READ_ONCE() in page_pool_napi_local(). @@ -1032,6 +1032,7 @@ static void page_pool_disable_direct_recycling(struct page_pool *pool) WRITE_ONCE(pool->p.napi, NULL); } +EXPORT_SYMBOL(page_pool_disable_direct_recycling); void page_pool_destroy(struct page_pool *pool) { -- cgit v1.2.3 From d839a73179ae91c07f5f2f97ccb9c69b2b7c3306 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 28 Jun 2024 12:18:55 +0200 Subject: net: Optimize xdp_do_flush() with bpf_net_context infos. Every NIC driver utilizing XDP should invoke xdp_do_flush() after processing all packages. With the introduction of the bpf_net_context logic the flush lists (for dev, CPU-map and xsk) are lazy initialized only if used. However xdp_do_flush() tries to flush all three of them so all three lists are always initialized and the likely empty lists are "iterated". Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also initialized due to xdp_do_check_flushed(). Jakub suggest to utilize the hints in bpf_net_context and avoid invoking the flush function. This will also avoiding initializing the lists which are otherwise unused. Introduce bpf_net_ctx_get_all_used_flush_lists() to return the individual list if not-empty. Use the logic in xdp_do_flush() and xdp_do_check_flushed(). Remove the not needed .*_check_flush(). Suggested-by: Jakub Kicinski Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/bpf.h | 10 ++++------ include/linux/filter.h | 27 +++++++++++++++++++++++++++ include/net/xdp_sock.h | 14 ++------------ kernel/bpf/cpumap.c | 13 +------------ kernel/bpf/devmap.c | 13 +------------ net/core/filter.c | 33 +++++++++++++++++++++++++-------- net/xdp/xsk.c | 13 +------------ 7 files changed, 61 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f5c6bc9093a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2494,7 +2494,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); -bool dev_check_flush(void); -bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline void __dev_flush(void) +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index c0349522de8f..02ddcfdf94c4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) return &bpf_net_ctx->xskmap_map_flush_list; } +static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map, + struct list_head **lh_dev, + struct list_head **lh_xsk) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + u32 kern_flags = bpf_net_ctx->ri.kern_flags; + struct list_head *lh; + + *lh_map = *lh_dev = *lh_xsk = NULL; + + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return; + + lh = &bpf_net_ctx->dev_map_flush_list; + if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh)) + *lh_dev = lh; + + lh = &bpf_net_ctx->cpu_map_flush_list; + if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh)) + *lh_map = lh; + + lh = &bpf_net_ctx->xskmap_map_flush_list; + if (IS_ENABLED(CONFIG_XDP_SOCKETS) && + kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh)) + *lh_xsk = lh; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3d54de168a6d..bfe625b55d55 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); -void __xsk_map_flush(void); +void __xsk_map_flush(struct list_head *flush_list); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return -EOPNOTSUPP; } -static inline void __xsk_map_flush(void) +static inline void __xsk_map_flush(struct list_head *flush_list) { } @@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, } #endif /* CONFIG_XDP_SOCKETS */ - -#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) -bool xsk_map_check_flush(void); -#else -static inline bool xsk_map_check_flush(void) -{ - return false; -} -#endif - #endif /* _LINUX_XDP_SOCK_H */ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 068e994ed781..4acf90cd79eb 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -757,9 +757,8 @@ trace: return ret; } -void __cpu_map_flush(void) +void __cpu_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -769,13 +768,3 @@ void __cpu_map_flush(void) wake_up_process(bq->obj->kthread); } } - -#ifdef CONFIG_DEBUG_NET -bool cpu_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) - return false; - __cpu_map_flush(); - return true; -} -#endif diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 317ac2d66ebd..9ca47eaacdd5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -412,9 +412,8 @@ out: * driver before returning from its napi->poll() routine. See the comment above * xdp_do_flush() in filter.c. */ -void __dev_flush(void) +void __dev_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -425,16 +424,6 @@ void __dev_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool dev_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_dev_flush_list())) - return false; - __dev_flush(); - return true; -} -#endif - /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or * by local_bh_disable() (from XDP calls inside NAPI). The * rcu_read_lock_bh_held() below makes lockdep accept both. diff --git a/net/core/filter.c b/net/core/filter.c index eb1c4425c06f..403d23faf22e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4277,22 +4277,39 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { */ void xdp_do_flush(void) { - __dev_flush(); - __cpu_map_flush(); - __xsk_map_flush(); + struct list_head *lh_map, *lh_dev, *lh_xsk; + + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) + __dev_flush(lh_dev); + if (lh_map) + __cpu_map_flush(lh_map); + if (lh_xsk) + __xsk_map_flush(lh_xsk); } EXPORT_SYMBOL_GPL(xdp_do_flush); #if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) void xdp_do_check_flushed(struct napi_struct *napi) { - bool ret; + struct list_head *lh_map, *lh_dev, *lh_xsk; + bool missed = false; - ret = dev_check_flush(); - ret |= cpu_map_check_flush(); - ret |= xsk_map_check_flush(); + bpf_net_ctx_get_all_used_flush_lists(&lh_map, &lh_dev, &lh_xsk); + if (lh_dev) { + __dev_flush(lh_dev); + missed = true; + } + if (lh_map) { + __cpu_map_flush(lh_map); + missed = true; + } + if (lh_xsk) { + __xsk_map_flush(lh_xsk); + missed = true; + } - WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n", + WARN_ONCE(missed, "Missing xdp_do_flush() invocation after NAPI by %ps\n", napi->poll); } #endif diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ed062e038389..de9c0322bc29 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -383,9 +383,8 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return 0; } -void __xsk_map_flush(void) +void __xsk_map_flush(struct list_head *flush_list) { - struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -394,16 +393,6 @@ void __xsk_map_flush(void) } } -#ifdef CONFIG_DEBUG_NET -bool xsk_map_check_flush(void) -{ - if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) - return false; - __xsk_map_flush(); - return true; -} -#endif - void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { xskq_prod_submit_n(pool->cq, nb_entries); -- cgit v1.2.3 From 4dec64c52e24c2c9a15f81c115f1be5ea35121cb Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 28 Jun 2024 00:32:42 +0000 Subject: page_pool: convert to use netmem Abstract the memory type from the page_pool so we can later add support for new memory types. Convert the page_pool to use the new netmem type abstraction, rather than use struct page directly. As of this patch the netmem type is a no-op abstraction: it's always a struct page underneath. All the page pool internals are converted to use struct netmem instead of struct page, and the page pool now exports 2 APIs: 1. The existing struct page API. 2. The new struct netmem API. Keeping the existing API is transitional; we do not want to refactor all the current drivers using the page pool at once. The netmem abstraction is currently a no-op. The page_pool uses page_to_netmem() to convert allocated pages to netmem, and uses netmem_to_page() to convert the netmem back to pages to pass to mm APIs, Follow up patches to this series add non-paged netmem support to the page_pool. This change is factored out on its own to limit the code churn to this 1 patch, for ease of code review. Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Link: https://patch.msgid.link/20240628003253.1694510-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff_ref.h | 4 +- include/net/netmem.h | 15 ++ include/net/page_pool/helpers.h | 91 +++++++++--- include/net/page_pool/types.h | 14 +- include/trace/events/page_pool.h | 30 ++-- net/bpf/test_run.c | 5 +- net/core/page_pool.c | 304 +++++++++++++++++++++------------------ net/core/skbuff.c | 8 +- 8 files changed, 287 insertions(+), 184 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 11f0a4063403..16c241a23472 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -32,13 +32,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } -bool napi_pp_put_page(struct page *page); +bool napi_pp_put_page(netmem_ref netmem); static inline void skb_page_unref(struct page *page, bool recycle) { #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page)) + if (recycle && napi_pp_put_page(page_to_netmem(page))) return; #endif put_page(page); diff --git a/include/net/netmem.h b/include/net/netmem.h index d8b810245c1d..46cc9b89ac79 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -38,4 +38,19 @@ static inline netmem_ref page_to_netmem(struct page *page) return (__force netmem_ref)page; } +static inline int netmem_ref_count(netmem_ref netmem) +{ + return page_ref_count(netmem_to_page(netmem)); +} + +static inline unsigned long netmem_to_pfn(netmem_ref netmem) +{ + return page_to_pfn(netmem_to_page(netmem)); +} + +static inline netmem_ref netmem_compound_head(netmem_ref netmem) +{ + return page_to_netmem(compound_head(netmem_to_page(netmem))); +} + #endif /* _NET_NETMEM_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 873631c79ab1..2b43a893c619 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -55,6 +55,8 @@ #include #include +#include +#include #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ @@ -212,6 +214,11 @@ page_pool_get_dma_dir(const struct page_pool *pool) return pool->p.dma_dir; } +static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) +{ + atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr); +} + /** * page_pool_fragment_page() - split a fresh page into fragments * @page: page to split @@ -232,11 +239,12 @@ page_pool_get_dma_dir(const struct page_pool *pool) */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_ref_count, nr); + page_pool_fragment_netmem(page_to_netmem(page), nr); } -static inline long page_pool_unref_page(struct page *page, long nr) +static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) { + struct page *page = netmem_to_page(netmem); long ret; /* If nr == pp_ref_count then we have cleared all remaining @@ -279,15 +287,41 @@ static inline long page_pool_unref_page(struct page *page, long nr) return ret; } +static inline long page_pool_unref_page(struct page *page, long nr) +{ + return page_pool_unref_netmem(page_to_netmem(page), nr); +} + +static inline void page_pool_ref_netmem(netmem_ref netmem) +{ + atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count); +} + static inline void page_pool_ref_page(struct page *page) { - atomic_long_inc(&page->pp_ref_count); + page_pool_ref_netmem(page_to_netmem(page)); } -static inline bool page_pool_is_last_ref(struct page *page) +static inline bool page_pool_is_last_ref(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ - return page_pool_unref_page(page, 1) == 0; + return page_pool_unref_netmem(netmem, 1) == 0; +} + +static inline void page_pool_put_netmem(struct page_pool *pool, + netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + if (!page_pool_is_last_ref(netmem)) + return; + + page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); +#endif } /** @@ -308,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool, unsigned int dma_sync_size, bool allow_direct) { - /* When page_pool isn't compiled-in, net/core/xdp.c doesn't - * allow registering MEM_TYPE_PAGE_POOL, but shield linker. - */ -#ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_ref(page)) - return; + page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} - page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); -#endif +static inline void page_pool_put_full_netmem(struct page_pool *pool, + netmem_ref netmem, + bool allow_direct) +{ + page_pool_put_netmem(pool, netmem, -1, allow_direct); } /** @@ -331,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool, static inline void page_pool_put_full_page(struct page_pool *pool, struct page *page, bool allow_direct) { - page_pool_put_page(pool, page, -1, allow_direct); + page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct); } /** @@ -365,6 +399,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); } +static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) +{ + struct page *page = netmem_to_page(netmem); + + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + /** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool @@ -374,16 +420,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) - ret <<= PAGE_SHIFT; - - return ret; + return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); } -static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, + dma_addr_t addr) { + struct page *page = netmem_to_page(netmem); + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { page->dma_addr = addr >> PAGE_SHIFT; @@ -419,6 +463,11 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, page_pool_get_dma_dir(pool)); } +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 9093a964fc33..b70bcc14ceda 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA * map/unmap @@ -40,7 +41,7 @@ #define PP_ALLOC_CACHE_REFILL 64 struct pp_alloc_cache { u32 count; - struct page *cache[PP_ALLOC_CACHE_SIZE]; + netmem_ref cache[PP_ALLOC_CACHE_SIZE]; }; /** @@ -73,7 +74,7 @@ struct page_pool_params { struct net_device *netdev; unsigned int flags; /* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); + void (*init_callback)(netmem_ref netmem, void *arg); void *init_arg; ); }; @@ -151,7 +152,7 @@ struct page_pool { */ __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); long frag_users; - struct page *frag_page; + netmem_ref frag_page; unsigned int frag_offset; __cacheline_group_end(frag); @@ -220,8 +221,12 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp); struct page_pool *page_pool_create(const struct page_pool_params *params); struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, int cpuid); @@ -252,6 +257,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct); void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct); diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 6834356b2d2a..543e54e432a1 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -42,51 +42,53 @@ TRACE_EVENT(page_pool_release, TRACE_EVENT(page_pool_state_release, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 release), + netmem_ref netmem, u32 release), - TP_ARGS(pool, page, release), + TP_ARGS(pool, netmem, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, release) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->release = release; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->release) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 hold), + netmem_ref netmem, u32 hold), - TP_ARGS(pool, page, hold), + TP_ARGS(pool, netmem, hold), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, hold) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->hold) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a6d7f790cdda..26417ab34ff4 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -127,9 +127,10 @@ struct xdp_test_data { #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) #define TEST_XDP_MAX_BATCH 256 -static void xdp_test_run_init_page(struct page *page, void *arg) +static void xdp_test_run_init_page(netmem_ref netmem, void *arg) { - struct xdp_page_head *head = phys_to_virt(page_to_phys(page)); + struct xdp_page_head *head = + phys_to_virt(page_to_phys(netmem_to_page(netmem))); struct xdp_buff *new_ctx, *orig_ctx; u32 headroom = XDP_PACKET_HEADROOM; struct xdp_test_data *xdp = arg; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5f1ed6f2ca8f..dc52f078adde 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -327,19 +327,18 @@ struct page_pool *page_pool_create(const struct page_pool_params *params) } EXPORT_SYMBOL(page_pool_create); -static void page_pool_return_page(struct page_pool *pool, struct page *page); +static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); -noinline -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) +static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; - struct page *page; + netmem_ref netmem; int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) { alloc_stat_inc(pool, empty); - return NULL; + return 0; } /* Softirq guarantee CPU and thus NUMA node is stable. This, @@ -354,57 +353,57 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool) /* Refill alloc array, but only if NUMA match */ do { - page = __ptr_ring_consume(r); - if (unlikely(!page)) + netmem = (__force netmem_ref)__ptr_ring_consume(r); + if (unlikely(!netmem)) break; - if (likely(page_to_nid(page) == pref_nid)) { - pool->alloc.cache[pool->alloc.count++] = page; + if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) { + pool->alloc.cache[pool->alloc.count++] = netmem; } else { /* NUMA mismatch; * (1) release 1 page to page-allocator and * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); alloc_stat_inc(pool, waive); - page = NULL; + netmem = 0; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, refill); } - return page; + return netmem; } /* fast path */ -static struct page *__page_pool_get_cached(struct page_pool *pool) +static netmem_ref __page_pool_get_cached(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ if (likely(pool->alloc.count)) { /* Fast-path */ - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, fast); } else { - page = page_pool_refill_alloc_cache(pool); + netmem = page_pool_refill_alloc_cache(pool); } - return page; + return netmem; } static void __page_pool_dma_sync_for_device(const struct page_pool *pool, - const struct page *page, + netmem_ref netmem, u32 dma_sync_size) { #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) - dma_addr_t dma_addr = page_pool_get_dma_addr(page); + dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_size = min(dma_sync_size, pool->p.max_len); __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, @@ -414,14 +413,14 @@ static void __page_pool_dma_sync_for_device(const struct page_pool *pool, static __always_inline void page_pool_dma_sync_for_device(const struct page_pool *pool, - const struct page *page, + netmem_ref netmem, u32 dma_sync_size) { if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) - __page_pool_dma_sync_for_device(pool, page, dma_sync_size); + __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); } -static bool page_pool_dma_map(struct page_pool *pool, struct page *page) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) { dma_addr_t dma; @@ -430,17 +429,17 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ - dma = dma_map_page_attrs(pool->p.dev, page, 0, - (PAGE_SIZE << pool->p.order), - pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | - DMA_ATTR_WEAK_ORDERING); + dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0, + (PAGE_SIZE << pool->p.order), pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(pool->p.dev, dma)) return false; - if (page_pool_set_dma_addr(page, dma)) + if (page_pool_set_dma_addr_netmem(netmem, dma)) goto unmap_failed; - page_pool_dma_sync_for_device(pool, page, pool->p.max_len); + page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; @@ -452,9 +451,10 @@ unmap_failed: return false; } -static void page_pool_set_pp_info(struct page_pool *pool, - struct page *page) +static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page->pp = pool; page->pp_magic |= PP_SIGNATURE; @@ -464,13 +464,15 @@ static void page_pool_set_pp_info(struct page_pool *pool, * is dirtying the same cache line as the page->pp_magic above, so * the overhead is negligible. */ - page_pool_fragment_page(page, 1); + page_pool_fragment_netmem(netmem, 1); if (pool->has_init_callback) - pool->slow.init_callback(page, pool->slow.init_arg); + pool->slow.init_callback(netmem, pool->slow.init_arg); } -static void page_pool_clear_pp_info(struct page *page) +static void page_pool_clear_pp_info(netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page->pp_magic = 0; page->pp = NULL; } @@ -485,34 +487,34 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { put_page(page); return NULL; } alloc_stat_inc(pool, slow_high_order); - page_pool_set_pp_info(pool, page); + page_pool_set_pp_info(pool, page_to_netmem(page)); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + trace_page_pool_state_hold(pool, page_to_netmem(page), + pool->pages_state_hold_cnt); return page; } /* slow path */ -noinline -static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, - gfp_t gfp) +static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, + gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; bool dma_map = pool->dma_map; - struct page *page; + netmem_ref netmem; int i, nr_pages; /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) - return __page_pool_alloc_page_order(pool, gfp); + return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); /* Unnecessary as alloc cache is empty, but guarantees zero count */ if (unlikely(pool->alloc.count > 0)) @@ -521,56 +523,63 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); - nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk, - pool->alloc.cache); + nr_pages = alloc_pages_bulk_array_node(gfp, + pool->p.nid, bulk, + (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) - return NULL; + return 0; /* Pages have been filled into alloc.cache array, but count is zero and * page element have not been (possibly) DMA mapped. */ for (i = 0; i < nr_pages; i++) { - page = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, page))) { - put_page(page); + netmem = pool->alloc.cache[i]; + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + put_page(netmem_to_page(netmem)); continue; } - page_pool_set_pp_info(pool, page); - pool->alloc.cache[pool->alloc.count++] = page; + page_pool_set_pp_info(pool, netmem); + pool->alloc.cache[pool->alloc.count++] = netmem; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; - trace_page_pool_state_hold(pool, page, + trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); } /* Return last page */ if (likely(pool->alloc.count > 0)) { - page = pool->alloc.cache[--pool->alloc.count]; + netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, slow); } else { - page = NULL; + netmem = 0; } /* When page just alloc'ed is should/must have refcnt 1. */ - return page; + return netmem; } /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) { - struct page *page; + netmem_ref netmem; /* Fast-path: Get a page from cache */ - page = __page_pool_get_cached(pool); - if (page) - return page; + netmem = __page_pool_get_cached(pool); + if (netmem) + return netmem; /* Slow-path: cache empty, do real allocation */ - page = __page_pool_alloc_pages_slow(pool, gfp); - return page; + netmem = __page_pool_alloc_pages_slow(pool, gfp); + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_netmem); + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -599,8 +608,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict) return inflight; } -static __always_inline -void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) +static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, + netmem_ref netmem) { dma_addr_t dma; @@ -610,13 +619,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) */ return; - dma = page_pool_get_dma_addr(page); + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); - page_pool_set_dma_addr(page, 0); + page_pool_set_dma_addr_netmem(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -624,35 +633,34 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page) * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ -void page_pool_return_page(struct page_pool *pool, struct page *page) +void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) { int count; - __page_pool_release_page_dma(pool, page); - - page_pool_clear_pp_info(page); + __page_pool_release_page_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. */ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); - trace_page_pool_state_release(pool, page, count); + trace_page_pool_state_release(pool, netmem, count); - put_page(page); + page_pool_clear_pp_info(netmem); + put_page(netmem_to_page(netmem)); /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). */ } -static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) +static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { int ret; /* BH protection not needed if current is softirq */ if (in_softirq()) - ret = ptr_ring_produce(&pool->ring, page); + ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); else - ret = ptr_ring_produce_bh(&pool->ring, page); + ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); if (!ret) { recycle_stat_inc(pool, ring); @@ -667,7 +675,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) * * Caller must provide appropriate safe context. */ -static bool page_pool_recycle_in_cache(struct page *page, +static bool page_pool_recycle_in_cache(netmem_ref netmem, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { @@ -676,14 +684,15 @@ static bool page_pool_recycle_in_cache(struct page *page, } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ - pool->alloc.cache[pool->alloc.count++] = page; + pool->alloc.cache[pool->alloc.count++] = netmem; recycle_stat_inc(pool, cached); return true; } -static bool __page_pool_page_can_be_recycled(const struct page *page) +static bool __page_pool_page_can_be_recycled(netmem_ref netmem) { - return page_ref_count(page) == 1 && !page_is_pfmemalloc(page); + return page_ref_count(netmem_to_page(netmem)) == 1 && + !page_is_pfmemalloc(netmem_to_page(netmem)); } /* If the page refcnt == 1, this will try to recycle the page. @@ -692,8 +701,8 @@ static bool __page_pool_page_can_be_recycled(const struct page *page) * If the page refcnt != 1, then the page will be returned to memory * subsystem. */ -static __always_inline struct page * -__page_pool_put_page(struct page_pool *pool, struct page *page, +static __always_inline netmem_ref +__page_pool_put_page(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { lockdep_assert_no_hardirq(); @@ -707,16 +716,16 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ - if (likely(__page_pool_page_can_be_recycled(page))) { + if (likely(__page_pool_page_can_be_recycled(netmem))) { /* Read barrier done in page_ref_count / READ_ONCE */ - page_pool_dma_sync_for_device(pool, page, dma_sync_size); + page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); - if (allow_direct && page_pool_recycle_in_cache(page, pool)) - return NULL; + if (allow_direct && page_pool_recycle_in_cache(netmem, pool)) + return 0; /* Page found as candidate for recycling */ - return page; + return netmem; } /* Fallback/non-XDP mode: API user have elevated refcnt. * @@ -732,9 +741,9 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); - return NULL; + return 0; } static bool page_pool_napi_local(const struct page_pool *pool) @@ -760,19 +769,28 @@ static bool page_pool_napi_local(const struct page_pool *pool) return napi && READ_ONCE(napi->list_owner) == cpuid; } -void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, bool allow_direct) +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, + unsigned int dma_sync_size, bool allow_direct) { if (!allow_direct) allow_direct = page_pool_napi_local(pool); - page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct); - if (page && !page_pool_recycle_in_ring(pool, page)) { + netmem = + __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct); + if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } } +EXPORT_SYMBOL(page_pool_put_unrefed_netmem); + +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, bool allow_direct) +{ + page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} EXPORT_SYMBOL(page_pool_put_unrefed_page); /** @@ -800,16 +818,16 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, allow_direct = page_pool_napi_local(pool); for (i = 0; i < count; i++) { - struct page *page = virt_to_head_page(data[i]); + netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(page)) + if (!page_pool_is_last_ref(netmem)) continue; - page = __page_pool_put_page(pool, page, -1, allow_direct); + netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ - if (page) - data[bulk_len++] = page; + if (netmem) + data[bulk_len++] = (__force void *)netmem; } if (!bulk_len) @@ -835,98 +853,106 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + page_pool_return_page(pool, (__force netmem_ref)data[i]); } EXPORT_SYMBOL(page_pool_put_page_bulk); -static struct page *page_pool_drain_frag(struct page_pool *pool, - struct page *page) +static netmem_ref page_pool_drain_frag(struct page_pool *pool, + netmem_ref netmem) { long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ - if (likely(page_pool_unref_page(page, drain_count))) - return NULL; + if (likely(page_pool_unref_netmem(netmem, drain_count))) + return 0; - if (__page_pool_page_can_be_recycled(page)) { - page_pool_dma_sync_for_device(pool, page, -1); - return page; + if (__page_pool_page_can_be_recycled(netmem)) { + page_pool_dma_sync_for_device(pool, netmem, -1); + return netmem; } - page_pool_return_page(pool, page); - return NULL; + page_pool_return_page(pool, netmem); + return 0; } static void page_pool_free_frag(struct page_pool *pool) { long drain_count = BIAS_MAX - pool->frag_users; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; - pool->frag_page = NULL; + pool->frag_page = 0; - if (!page || page_pool_unref_page(page, drain_count)) + if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } -struct page *page_pool_alloc_frag(struct page_pool *pool, - unsigned int *offset, - unsigned int size, gfp_t gfp) +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page = pool->frag_page; + netmem_ref netmem = pool->frag_page; if (WARN_ON(size > max_size)) - return NULL; + return 0; size = ALIGN(size, dma_get_cache_alignment()); *offset = pool->frag_offset; - if (page && *offset + size > max_size) { - page = page_pool_drain_frag(pool, page); - if (page) { + if (netmem && *offset + size > max_size) { + netmem = page_pool_drain_frag(pool, netmem); + if (netmem) { alloc_stat_inc(pool, fast); goto frag_reset; } } - if (!page) { - page = page_pool_alloc_pages(pool, gfp); - if (unlikely(!page)) { - pool->frag_page = NULL; - return NULL; + if (!netmem) { + netmem = page_pool_alloc_netmem(pool, gfp); + if (unlikely(!netmem)) { + pool->frag_page = 0; + return 0; } - pool->frag_page = page; + pool->frag_page = netmem; frag_reset: pool->frag_users = 1; *offset = 0; pool->frag_offset = size; - page_pool_fragment_page(page, BIAS_MAX); - return page; + page_pool_fragment_netmem(netmem, BIAS_MAX); + return netmem; } pool->frag_users++; pool->frag_offset = *offset + size; alloc_stat_inc(pool, fast); - return page; + return netmem; +} +EXPORT_SYMBOL(page_pool_alloc_frag_netmem); + +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size, + gfp)); } EXPORT_SYMBOL(page_pool_alloc_frag); static void page_pool_empty_ring(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; /* Empty recycle ring */ - while ((page = ptr_ring_consume_bh(&pool->ring))) { + while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ - if (!(page_ref_count(page) == 1)) + if (!(page_ref_count(netmem_to_page(netmem)) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", - __func__, page_ref_count(page)); + __func__, netmem_ref_count(netmem)); - page_pool_return_page(pool, page); + page_pool_return_page(pool, netmem); } } @@ -942,7 +968,7 @@ static void __page_pool_destroy(struct page_pool *pool) static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { - struct page *page; + netmem_ref netmem; if (pool->destroy_cnt) return; @@ -952,8 +978,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) * call concurrently. */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_page(pool, netmem); } } @@ -1060,15 +1086,15 @@ EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { - struct page *page; + netmem_ref netmem; trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { - page = pool->alloc.cache[--pool->alloc.count]; - page_pool_return_page(pool, page); + netmem = pool->alloc.cache[--pool->alloc.count]; + page_pool_return_page(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0ed4d00d258c..83f8cd8aa2d1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1015,8 +1015,10 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) -bool napi_pp_put_page(struct page *page) +bool napi_pp_put_page(netmem_ref netmem) { + struct page *page = netmem_to_page(netmem); + page = compound_head(page); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation @@ -1029,7 +1031,7 @@ bool napi_pp_put_page(struct page *page) if (unlikely(!is_pp_page(page))) return false; - page_pool_put_full_page(page->pp, page, false); + page_pool_put_full_netmem(page->pp, page_to_netmem(page), false); return true; } @@ -1040,7 +1042,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return napi_pp_put_page(virt_to_page(data)); + return napi_pp_put_page(page_to_netmem(virt_to_page(data))); } /** -- cgit v1.2.3 From caa93b7c25945d302689de07bd404655db93ae6e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 3 Jul 2024 13:18:49 +0100 Subject: ethtool: move firmware flashing flag to struct ethtool_netdev_state Commit 31e0aa99dc02 ("ethtool: Veto some operations during firmware flashing process") added a flag module_fw_flash_in_progress to struct net_device. As this is ethtool related state, move it to the recently created struct ethtool_netdev_state, accessed via the 'ethtool' member of struct net_device. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Michal Kubiak Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20240703121849.652893-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ include/linux/netdevice.h | 3 --- net/ethtool/eeprom.c | 2 +- net/ethtool/ioctl.c | 8 ++++---- net/ethtool/module.c | 10 +++++----- net/ethtool/netlink.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f74bb0cf8ed1..3a99238ef895 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1107,11 +1107,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. * @wol_enabled: Wake-on-LAN is enabled + * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; }; struct phy_device; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c719f0d5f5a..93558645c6d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1989,8 +1989,6 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * - * @module_fw_flash_in_progress: Module firmware flashing is in progress. - * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2376,7 +2374,6 @@ struct net_device { bool proto_down; bool threaded; - unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index f36811b3ecf1..3b8209e930fd 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -91,7 +91,7 @@ static int get_module_eeprom_by_page(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing is in progress"); return -EBUSY; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 46f0497ae6bc..d72b0fec89af 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -658,7 +658,7 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; memset(&link_ksettings, 0, sizeof(link_ksettings)); @@ -1572,7 +1572,7 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) if (!dev->ethtool_ops->reset) return -EOPNOTSUPP; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (copy_from_user(&reset, useraddr, sizeof(reset))) @@ -2588,7 +2588,7 @@ int ethtool_get_module_info_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (dev->sfp_bus) @@ -2628,7 +2628,7 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; - if (dev->module_fw_flash_in_progress) + if (dev->ethtool->module_fw_flash_in_progress) return -EBUSY; if (dev->sfp_bus) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 6b7448df08d5..aba78436d350 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -37,7 +37,7 @@ static int module_get_power_mode(struct net_device *dev, if (!ops->get_module_power_mode) return 0; - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing is in progress"); return -EBUSY; @@ -119,7 +119,7 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; - if (req_info->dev->module_fw_flash_in_progress) { + if (req_info->dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(info->extack, "Module firmware flashing is in progress"); return -EBUSY; @@ -226,7 +226,7 @@ static void module_flash_fw_work(struct work_struct *work) ethtool_cmis_fw_update(&module_fw->fw_update); module_flash_fw_work_list_del(&module_fw->list); - module_fw->fw_update.dev->module_fw_flash_in_progress = false; + module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false; netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); release_firmware(module_fw->fw_update.fw); kfree(module_fw); @@ -318,7 +318,7 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; - dev->module_fw_flash_in_progress = true; + dev->ethtool->module_fw_flash_in_progress = true; netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); fw_update->dev = dev; fw_update->ntf_params.portid = info->snd_portid; @@ -385,7 +385,7 @@ static int ethnl_module_fw_flash_validate(struct net_device *dev, return -EOPNOTSUPP; } - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress"); return -EBUSY; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 81fe2e5b95f6..cb1eea00e349 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -807,7 +807,7 @@ static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, ethnl_notify_features(ptr); break; case NETDEV_PRE_UP: - if (dev->module_fw_flash_in_progress) { + if (dev->ethtool->module_fw_flash_in_progress) { NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware"); return NOTIFY_BAD; } -- cgit v1.2.3 From f37bee9508885ce5edd4875c744e1af28cfac76c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:32 +0300 Subject: net: pcs: xpcs: Move native device ID macro to linux/pcs/pcs-xpcs.h One of the next commits will alter the DW XPCS driver to support setting a custom device ID for the particular MDIO-device detected on the platform. The generic DW XPCS ID can be used as a custom ID as well in case if the DW XPCS-device was erroneously synthesized with no or some undefined ID. In addition to that having all supported DW XPCS device IDs defined in a single place will improve the code maintainability and readability. Note while at it rename the macros to being shorter and looking alike to the already defined NXP XPCS ID macro. Signed-off-by: Serge Semin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 8 ++++---- drivers/net/pcs/pcs-xpcs.h | 3 --- include/linux/pcs/pcs-xpcs.h | 2 ++ 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 31525fe9c32e..99adbf15ab36 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1343,16 +1343,16 @@ static const struct xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] = static const struct xpcs_id xpcs_id_list[] = { { - .id = SYNOPSYS_XPCS_ID, - .mask = SYNOPSYS_XPCS_MASK, + .id = DW_XPCS_ID, + .mask = DW_XPCS_ID_MASK, .compat = synopsys_xpcs_compat, }, { .id = NXP_SJA1105_XPCS_ID, - .mask = SYNOPSYS_XPCS_MASK, + .mask = DW_XPCS_ID_MASK, .compat = nxp_sja1105_xpcs_compat, }, { .id = NXP_SJA1110_XPCS_ID, - .mask = SYNOPSYS_XPCS_MASK, + .mask = DW_XPCS_ID_MASK, .compat = nxp_sja1110_xpcs_compat, }, }; diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 96c36b32ca99..369e9196f45a 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -6,9 +6,6 @@ * Author: Jose Abreu */ -#define SYNOPSYS_XPCS_ID 0x7996ced0 -#define SYNOPSYS_XPCS_MASK 0xffffffff - /* Vendor regs access */ #define DW_VENDOR BIT(15) diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index da3a6c30f6d2..8dfe90295f12 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -12,6 +12,8 @@ #define NXP_SJA1105_XPCS_ID 0x00000010 #define NXP_SJA1110_XPCS_ID 0x00000020 +#define DW_XPCS_ID 0x7996ced0 +#define DW_XPCS_ID_MASK 0xffffffff /* AN mode */ #define DW_AN_C73 1 -- cgit v1.2.3 From 71b200b388ef2ff1b547114f17dc1fd088bfc2c6 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:34 +0300 Subject: net: pcs: xpcs: Convert xpcs_id to dw_xpcs_desc A structure with the PCS/PMA MMD IDs data is being introduced in one of the next commits. In order to prevent the names ambiguity let's convert the xpcs_id structure name to dw_xpcs_desc. The later version is more suitable since the structure content is indeed the device descriptor containing the data and callbacks required for the driver to correctly set the device up. Signed-off-by: Serge Semin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 32 ++++++++++++++++---------------- include/linux/pcs/pcs-xpcs.h | 4 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 2dcfd0ff069a..4ed29df8c963 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -151,19 +151,19 @@ struct xpcs_compat { int (*pma_config)(struct dw_xpcs *xpcs); }; -struct xpcs_id { +struct dw_xpcs_desc { u32 id; u32 mask; const struct xpcs_compat *compat; }; -static const struct xpcs_compat *xpcs_find_compat(const struct xpcs_id *id, - phy_interface_t interface) +static const struct xpcs_compat * +xpcs_find_compat(const struct dw_xpcs_desc *desc, phy_interface_t interface) { int i, j; for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) { - const struct xpcs_compat *compat = &id->compat[i]; + const struct xpcs_compat *compat = &desc->compat[i]; for (j = 0; j < compat->num_interfaces; j++) if (compat->interface[j] == interface) @@ -177,7 +177,7 @@ int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface) { const struct xpcs_compat *compat; - compat = xpcs_find_compat(xpcs->id, interface); + compat = xpcs_find_compat(xpcs->desc, interface); if (!compat) return -ENODEV; @@ -612,7 +612,7 @@ static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported, int i; xpcs = phylink_pcs_to_xpcs(pcs); - compat = xpcs_find_compat(xpcs->id, state->interface); + compat = xpcs_find_compat(xpcs->desc, state->interface); if (!compat) return -EINVAL; @@ -633,7 +633,7 @@ void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) int i, j; for (i = 0; i < DW_XPCS_INTERFACE_MAX; i++) { - const struct xpcs_compat *compat = &xpcs->id->compat[i]; + const struct xpcs_compat *compat = &xpcs->desc->compat[i]; for (j = 0; j < compat->num_interfaces; j++) __set_bit(compat->interface[j], interfaces); @@ -853,7 +853,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, const struct xpcs_compat *compat; int ret; - compat = xpcs_find_compat(xpcs->id, interface); + compat = xpcs_find_compat(xpcs->desc, interface); if (!compat) return -ENODEV; @@ -1118,7 +1118,7 @@ static void xpcs_get_state(struct phylink_pcs *pcs, const struct xpcs_compat *compat; int ret; - compat = xpcs_find_compat(xpcs->id, state->interface); + compat = xpcs_find_compat(xpcs->desc, state->interface); if (!compat) return; @@ -1341,7 +1341,7 @@ static const struct xpcs_compat nxp_sja1110_xpcs_compat[DW_XPCS_INTERFACE_MAX] = }, }; -static const struct xpcs_id xpcs_id_list[] = { +static const struct dw_xpcs_desc xpcs_desc_list[] = { { .id = DW_XPCS_ID, .mask = DW_XPCS_ID_MASK, @@ -1395,18 +1395,18 @@ static int xpcs_init_id(struct dw_xpcs *xpcs) xpcs_id = xpcs_get_id(xpcs); - for (i = 0; i < ARRAY_SIZE(xpcs_id_list); i++) { - const struct xpcs_id *entry = &xpcs_id_list[i]; + for (i = 0; i < ARRAY_SIZE(xpcs_desc_list); i++) { + const struct dw_xpcs_desc *desc = &xpcs_desc_list[i]; - if ((xpcs_id & entry->mask) != entry->id) + if ((xpcs_id & desc->mask) != desc->id) continue; - xpcs->id = entry; + xpcs->desc = desc; break; } - if (!xpcs->id) + if (!xpcs->desc) return -ENODEV; ret = xpcs_dev_flag(xpcs); @@ -1420,7 +1420,7 @@ static int xpcs_init_iface(struct dw_xpcs *xpcs, phy_interface_t interface) { const struct xpcs_compat *compat; - compat = xpcs_find_compat(xpcs->id, interface); + compat = xpcs_find_compat(xpcs->desc, interface); if (!compat) return -EINVAL; diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 8dfe90295f12..e706bd16b986 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -28,11 +28,11 @@ /* dev_flag */ #define DW_DEV_TXGBE BIT(0) -struct xpcs_id; +struct dw_xpcs_desc; struct dw_xpcs { + const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; - const struct xpcs_id *id; struct phylink_pcs pcs; phy_interface_t interface; int dev_flag; -- cgit v1.2.3 From bcac735cf653ef8dc6d7c08ed311e0a77f2665f0 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:36 +0300 Subject: net: pcs: xpcs: Introduce DW XPCS info structure The being introduced structure will preserve the PCS and PMA IDs retrieved from the respective DW XPCS MMDs or potentially pre-defined by the client drivers. (The later change will be introduced later in the framework of the commit adding the memory-mapped DW XPCS devices support.) The structure fields are filled in in the xpcs_get_id() function, which used to be responsible for the PCS Device ID getting only. Besides of the PCS ID the method now fetches the PMA/PMD IDs too from MMD 1, which used to be done in xpcs_dev_flag(). The retrieved PMA ID will be from now utilized for the PMA-specific tweaks like it was introduced for the Wangxun TxGBE PCS in the commit f629acc6f210 ("net: pcs: xpcs: support to switch mode for Wangxun NICs"). Note 1. The xpcs_get_id() error-handling semantics has been changed. From now the error number will be returned from the function. There is no point in the next IOs or saving 0xffs and then looping over the actual device IDs if device couldn't be reached. -ENODEV will be returned if the very first IO operation failed thus indicating that no device could be found. Note 2. The PCS and PMA IDs macros have been converted to enum'es. The enum'es will be populated later in another commit with the virtual IDs identifying the DW XPCS devices which have some platform-specifics, but have been synthesized with the default PCS/PMA ID. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 104 +++++++++++++++++++++---------------------- include/linux/pcs/pcs-xpcs.h | 28 +++++++----- 2 files changed, 67 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index eefb5e1dbe21..6397dac82d1f 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -237,29 +237,6 @@ int xpcs_write_vpcs(struct dw_xpcs *xpcs, int reg, u16 val) return xpcs_write_vendor(xpcs, MDIO_MMD_PCS, reg, val); } -static int xpcs_dev_flag(struct dw_xpcs *xpcs) -{ - int ret, oui; - - ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID1); - if (ret < 0) - return ret; - - oui = ret; - - ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID2); - if (ret < 0) - return ret; - - ret = (ret >> 10) & 0x3F; - oui |= ret << 16; - - if (oui == DW_OUI_WX) - xpcs->dev_flag = DW_DEV_TXGBE; - - return 0; -} - static int xpcs_poll_reset(struct dw_xpcs *xpcs, int dev) { /* Poll until the reset bit clears (50ms per retry == 0.6 sec) */ @@ -684,7 +661,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, { int ret, mdio_ctrl, tx_conf; - if (xpcs->dev_flag == DW_DEV_TXGBE) + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) xpcs_write_vpcs(xpcs, DW_VR_XS_PCS_DIG_CTRL1, DW_CL37_BP | DW_EN_VSMMD1); /* For AN for C37 SGMII mode, the settings are :- @@ -722,7 +699,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, ret |= (DW_VR_MII_PCS_MODE_C37_SGMII << DW_VR_MII_AN_CTRL_PCS_MODE_SHIFT & DW_VR_MII_PCS_MODE_MASK); - if (xpcs->dev_flag == DW_DEV_TXGBE) { + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { ret |= DW_VR_MII_AN_CTRL_8BIT; /* Hardware requires it to be PHY side SGMII */ tx_conf = DW_VR_MII_TX_CONFIG_PHY_SIDE_SGMII; @@ -744,7 +721,7 @@ static int xpcs_config_aneg_c37_sgmii(struct dw_xpcs *xpcs, else ret &= ~DW_VR_MII_DIG_CTRL1_MAC_AUTO_SW; - if (xpcs->dev_flag == DW_DEV_TXGBE) + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) ret |= DW_VR_MII_DIG_CTRL1_PHY_MODE_CTRL; ret = xpcs_write(xpcs, MDIO_MMD_VEND2, DW_VR_MII_DIG_CTRL1, ret); @@ -766,7 +743,7 @@ static int xpcs_config_aneg_c37_1000basex(struct dw_xpcs *xpcs, int ret, mdio_ctrl, adv; bool changed = 0; - if (xpcs->dev_flag == DW_DEV_TXGBE) + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) xpcs_write_vpcs(xpcs, DW_VR_XS_PCS_DIG_CTRL1, DW_CL37_BP | DW_EN_VSMMD1); /* According to Chap 7.12, to set 1000BASE-X C37 AN, AN must @@ -857,7 +834,7 @@ int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, if (!compat) return -ENODEV; - if (xpcs->dev_flag == DW_DEV_TXGBE) { + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { ret = txgbe_xpcs_switch_mode(xpcs, interface); if (ret) return ret; @@ -1229,44 +1206,66 @@ static void xpcs_an_restart(struct phylink_pcs *pcs) } } -static u32 xpcs_get_id(struct dw_xpcs *xpcs) +static int xpcs_get_id(struct dw_xpcs *xpcs) { int ret; u32 id; - /* First, search C73 PCS using PCS MMD */ + /* First, search C73 PCS using PCS MMD 3. Return ENODEV if communication + * failed indicating that device couldn't be reached. + */ ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID1); if (ret < 0) - return 0xffffffff; + return -ENODEV; id = ret << 16; ret = xpcs_read(xpcs, MDIO_MMD_PCS, MII_PHYSID2); if (ret < 0) - return 0xffffffff; + return ret; - /* If Device IDs are not all zeros or all ones, - * we found C73 AN-type device + id |= ret; + + /* If Device IDs are not all zeros or ones, then 10GBase-X/R or C73 + * KR/KX4 PCS found. Otherwise fallback to detecting 1000Base-X or C37 + * PCS in MII MMD 31. */ - if ((id | ret) && (id | ret) != 0xffffffff) - return id | ret; + if (!id || id == 0xffffffff) { + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1); + if (ret < 0) + return ret; + + id = ret << 16; + + ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2); + if (ret < 0) + return ret; - /* Next, search C37 PCS using Vendor-Specific MII MMD */ - ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID1); + id |= ret; + } + + xpcs->info.pcs = id; + + /* Find out PMA/PMD ID from MMD 1 device ID registers */ + ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID1); if (ret < 0) - return 0xffffffff; + return ret; - id = ret << 16; + id = ret; - ret = xpcs_read(xpcs, MDIO_MMD_VEND2, MII_PHYSID2); + ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID2); if (ret < 0) - return 0xffffffff; + return ret; + + /* Note the inverted dword order and masked out Model/Revision numbers + * with respect to what is done with the PCS ID... + */ + ret = (ret >> 10) & 0x3F; + id |= ret << 16; - /* If Device IDs are not all zeros, we found C37 AN-type device */ - if (id | ret) - return id | ret; + xpcs->info.pma = id; - return 0xffffffff; + return 0; } static const struct dw_xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = { @@ -1390,15 +1389,16 @@ static void xpcs_free_data(struct dw_xpcs *xpcs) static int xpcs_init_id(struct dw_xpcs *xpcs) { - u32 xpcs_id; int i, ret; - xpcs_id = xpcs_get_id(xpcs); + ret = xpcs_get_id(xpcs); + if (ret < 0) + return ret; for (i = 0; i < ARRAY_SIZE(xpcs_desc_list); i++) { const struct dw_xpcs_desc *desc = &xpcs_desc_list[i]; - if ((xpcs_id & desc->mask) != desc->id) + if ((xpcs->info.pcs & desc->mask) != desc->id) continue; xpcs->desc = desc; @@ -1409,10 +1409,6 @@ static int xpcs_init_id(struct dw_xpcs *xpcs) if (!xpcs->desc) return -ENODEV; - ret = xpcs_dev_flag(xpcs); - if (ret < 0) - return ret; - return 0; } @@ -1424,7 +1420,7 @@ static int xpcs_init_iface(struct dw_xpcs *xpcs, phy_interface_t interface) if (!compat) return -EINVAL; - if (xpcs->dev_flag == DW_DEV_TXGBE) { + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { xpcs->pcs.poll = false; return 0; } diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index e706bd16b986..1dc60f5e653f 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -9,11 +9,7 @@ #include #include - -#define NXP_SJA1105_XPCS_ID 0x00000010 -#define NXP_SJA1110_XPCS_ID 0x00000020 -#define DW_XPCS_ID 0x7996ced0 -#define DW_XPCS_ID_MASK 0xffffffff +#include /* AN mode */ #define DW_AN_C73 1 @@ -22,20 +18,30 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 -/* device vendor OUI */ -#define DW_OUI_WX 0x0018fc80 +struct dw_xpcs_desc; -/* dev_flag */ -#define DW_DEV_TXGBE BIT(0) +enum dw_xpcs_pcs_id { + NXP_SJA1105_XPCS_ID = 0x00000010, + NXP_SJA1110_XPCS_ID = 0x00000020, + DW_XPCS_ID = 0x7996ced0, + DW_XPCS_ID_MASK = 0xffffffff, +}; -struct dw_xpcs_desc; +enum dw_xpcs_pma_id { + WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80, +}; + +struct dw_xpcs_info { + u32 pcs; + u32 pma; +}; struct dw_xpcs { + struct dw_xpcs_info info; const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; struct phylink_pcs pcs; phy_interface_t interface; - int dev_flag; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From f6bb3e9d98c2e8d70587d5ddaf9426ef30d7865c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:38 +0300 Subject: net: pcs: xpcs: Add Synopsys DW xPCS platform device driver Synopsys DesignWare XPCS IP-core can be synthesized with the device CSRs being accessible over the MCI or APB3 interface instead of the MDIO bus (see the CSR_INTERFACE HDL parameter). Thus all the PCS registers can be just memory mapped and be a subject of the standard MMIO operations of course taking into account the peculiarities of the Clause C45 CSRs mapping. From that perspective the DW XPCS devices would look as just normal platform devices for the kernel. On the other hand in order to have the DW XPCS devices handled by the pcs-xpcs.c driver they need to be registered in the framework of the MDIO-subsystem. So the suggested change is about providing a DW XPCS platform device driver registering a virtual MDIO-bus with a single MDIO-device representing the DW XPCS device. DW XPCS platform device is supposed to be described by the respective compatible string "snps,dw-xpcs" (or with the PMA-specific compatible string), CSRs memory space and optional peripheral bus and reference clock sources. Depending on the INDIRECT_ACCESS IP-core synthesize parameter the memory-mapped reg-space can be represented as either directly or indirectly mapped Clause 45 space. In the former case the particular address is determined based on the MMD device and the registers offset (5 + 16 bits all together) within the device reg-space. In the later case there is only 8 lower address bits are utilized for the registers mapping (255 CSRs). The upper bits are supposed to be written into the respective viewport CSR in order to select the respective MMD sub-page. Note, only the peripheral bus clock source is requested in the platform device probe procedure. The core and pad clocks handling has been implemented in the framework of the xpcs_create() method intentionally since the clocks-related setups are supposed to be performed later, during the DW XPCS main configuration procedures. (For instance they will be required for the DW Gen5 10G PMA configuration.) Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/pcs/Kconfig | 6 +- drivers/net/pcs/Makefile | 3 +- drivers/net/pcs/pcs-xpcs-plat.c | 460 ++++++++++++++++++++++++++++++++++++++++ drivers/net/pcs/pcs-xpcs.c | 63 +++++- drivers/net/pcs/pcs-xpcs.h | 6 + include/linux/pcs/pcs-xpcs.h | 18 ++ 6 files changed, 547 insertions(+), 9 deletions(-) create mode 100644 drivers/net/pcs/pcs-xpcs-plat.c (limited to 'include') diff --git a/drivers/net/pcs/Kconfig b/drivers/net/pcs/Kconfig index 87cf308fc6d8..f6aa437473de 100644 --- a/drivers/net/pcs/Kconfig +++ b/drivers/net/pcs/Kconfig @@ -6,11 +6,11 @@ menu "PCS device drivers" config PCS_XPCS - tristate + tristate "Synopsys DesignWare Ethernet XPCS" select PHYLINK help - This module provides helper functions for Synopsys DesignWare XPCS - controllers. + This module provides a driver and helper functions for Synopsys + DesignWare XPCS controllers. config PCS_LYNX tristate diff --git a/drivers/net/pcs/Makefile b/drivers/net/pcs/Makefile index fb1694192ae6..4f7920618b90 100644 --- a/drivers/net/pcs/Makefile +++ b/drivers/net/pcs/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for Linux PCS drivers -pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-nxp.o pcs-xpcs-wx.o +pcs_xpcs-$(CONFIG_PCS_XPCS) := pcs-xpcs.o pcs-xpcs-plat.o \ + pcs-xpcs-nxp.o pcs-xpcs-wx.o obj-$(CONFIG_PCS_XPCS) += pcs_xpcs.o obj-$(CONFIG_PCS_LYNX) += pcs-lynx.o diff --git a/drivers/net/pcs/pcs-xpcs-plat.c b/drivers/net/pcs/pcs-xpcs-plat.c new file mode 100644 index 000000000000..629315f1e57c --- /dev/null +++ b/drivers/net/pcs/pcs-xpcs-plat.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Synopsys DesignWare XPCS platform device driver + * + * Copyright (C) 2024 Serge Semin + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcs-xpcs.h" + +/* Page select register for the indirect MMIO CSRs access */ +#define DW_VR_CSR_VIEWPORT 0xff + +struct dw_xpcs_plat { + struct platform_device *pdev; + struct mii_bus *bus; + bool reg_indir; + int reg_width; + void __iomem *reg_base; + struct clk *cclk; +}; + +static ptrdiff_t xpcs_mmio_addr_format(int dev, int reg) +{ + return FIELD_PREP(0x1f0000, dev) | FIELD_PREP(0xffff, reg); +} + +static u16 xpcs_mmio_addr_page(ptrdiff_t csr) +{ + return FIELD_GET(0x1fff00, csr); +} + +static ptrdiff_t xpcs_mmio_addr_offset(ptrdiff_t csr) +{ + return FIELD_GET(0xff, csr); +} + +static int xpcs_mmio_read_reg_indirect(struct dw_xpcs_plat *pxpcs, + int dev, int reg) +{ + ptrdiff_t csr, ofs; + u16 page; + int ret; + + csr = xpcs_mmio_addr_format(dev, reg); + page = xpcs_mmio_addr_page(csr); + ofs = xpcs_mmio_addr_offset(csr); + + ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev); + if (ret) + return ret; + + switch (pxpcs->reg_width) { + case 4: + writel(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 2)); + ret = readl(pxpcs->reg_base + (ofs << 2)); + break; + default: + writew(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 1)); + ret = readw(pxpcs->reg_base + (ofs << 1)); + break; + } + + pm_runtime_put(&pxpcs->pdev->dev); + + return ret; +} + +static int xpcs_mmio_write_reg_indirect(struct dw_xpcs_plat *pxpcs, + int dev, int reg, u16 val) +{ + ptrdiff_t csr, ofs; + u16 page; + int ret; + + csr = xpcs_mmio_addr_format(dev, reg); + page = xpcs_mmio_addr_page(csr); + ofs = xpcs_mmio_addr_offset(csr); + + ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev); + if (ret) + return ret; + + switch (pxpcs->reg_width) { + case 4: + writel(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 2)); + writel(val, pxpcs->reg_base + (ofs << 2)); + break; + default: + writew(page, pxpcs->reg_base + (DW_VR_CSR_VIEWPORT << 1)); + writew(val, pxpcs->reg_base + (ofs << 1)); + break; + } + + pm_runtime_put(&pxpcs->pdev->dev); + + return 0; +} + +static int xpcs_mmio_read_reg_direct(struct dw_xpcs_plat *pxpcs, + int dev, int reg) +{ + ptrdiff_t csr; + int ret; + + csr = xpcs_mmio_addr_format(dev, reg); + + ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev); + if (ret) + return ret; + + switch (pxpcs->reg_width) { + case 4: + ret = readl(pxpcs->reg_base + (csr << 2)); + break; + default: + ret = readw(pxpcs->reg_base + (csr << 1)); + break; + } + + pm_runtime_put(&pxpcs->pdev->dev); + + return ret; +} + +static int xpcs_mmio_write_reg_direct(struct dw_xpcs_plat *pxpcs, + int dev, int reg, u16 val) +{ + ptrdiff_t csr; + int ret; + + csr = xpcs_mmio_addr_format(dev, reg); + + ret = pm_runtime_resume_and_get(&pxpcs->pdev->dev); + if (ret) + return ret; + + switch (pxpcs->reg_width) { + case 4: + writel(val, pxpcs->reg_base + (csr << 2)); + break; + default: + writew(val, pxpcs->reg_base + (csr << 1)); + break; + } + + pm_runtime_put(&pxpcs->pdev->dev); + + return 0; +} + +static int xpcs_mmio_read_c22(struct mii_bus *bus, int addr, int reg) +{ + struct dw_xpcs_plat *pxpcs = bus->priv; + + if (addr != 0) + return -ENODEV; + + if (pxpcs->reg_indir) + return xpcs_mmio_read_reg_indirect(pxpcs, MDIO_MMD_VEND2, reg); + else + return xpcs_mmio_read_reg_direct(pxpcs, MDIO_MMD_VEND2, reg); +} + +static int xpcs_mmio_write_c22(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dw_xpcs_plat *pxpcs = bus->priv; + + if (addr != 0) + return -ENODEV; + + if (pxpcs->reg_indir) + return xpcs_mmio_write_reg_indirect(pxpcs, MDIO_MMD_VEND2, reg, val); + else + return xpcs_mmio_write_reg_direct(pxpcs, MDIO_MMD_VEND2, reg, val); +} + +static int xpcs_mmio_read_c45(struct mii_bus *bus, int addr, int dev, int reg) +{ + struct dw_xpcs_plat *pxpcs = bus->priv; + + if (addr != 0) + return -ENODEV; + + if (pxpcs->reg_indir) + return xpcs_mmio_read_reg_indirect(pxpcs, dev, reg); + else + return xpcs_mmio_read_reg_direct(pxpcs, dev, reg); +} + +static int xpcs_mmio_write_c45(struct mii_bus *bus, int addr, int dev, + int reg, u16 val) +{ + struct dw_xpcs_plat *pxpcs = bus->priv; + + if (addr != 0) + return -ENODEV; + + if (pxpcs->reg_indir) + return xpcs_mmio_write_reg_indirect(pxpcs, dev, reg, val); + else + return xpcs_mmio_write_reg_direct(pxpcs, dev, reg, val); +} + +static struct dw_xpcs_plat *xpcs_plat_create_data(struct platform_device *pdev) +{ + struct dw_xpcs_plat *pxpcs; + + pxpcs = devm_kzalloc(&pdev->dev, sizeof(*pxpcs), GFP_KERNEL); + if (!pxpcs) + return ERR_PTR(-ENOMEM); + + pxpcs->pdev = pdev; + + dev_set_drvdata(&pdev->dev, pxpcs); + + return pxpcs; +} + +static int xpcs_plat_init_res(struct dw_xpcs_plat *pxpcs) +{ + struct platform_device *pdev = pxpcs->pdev; + struct device *dev = &pdev->dev; + resource_size_t spc_size; + struct resource *res; + + if (!device_property_read_u32(dev, "reg-io-width", &pxpcs->reg_width)) { + if (pxpcs->reg_width != 2 && pxpcs->reg_width != 4) { + dev_err(dev, "Invalid reg-space data width\n"); + return -EINVAL; + } + } else { + pxpcs->reg_width = 2; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "direct") ?: + platform_get_resource_byname(pdev, IORESOURCE_MEM, "indirect"); + if (!res) { + dev_err(dev, "No reg-space found\n"); + return -EINVAL; + } + + if (!strcmp(res->name, "indirect")) + pxpcs->reg_indir = true; + + if (pxpcs->reg_indir) + spc_size = pxpcs->reg_width * SZ_256; + else + spc_size = pxpcs->reg_width * SZ_2M; + + if (resource_size(res) < spc_size) { + dev_err(dev, "Invalid reg-space size\n"); + return -EINVAL; + } + + pxpcs->reg_base = devm_ioremap_resource(dev, res); + if (IS_ERR(pxpcs->reg_base)) { + dev_err(dev, "Failed to map reg-space\n"); + return PTR_ERR(pxpcs->reg_base); + } + + return 0; +} + +static int xpcs_plat_init_clk(struct dw_xpcs_plat *pxpcs) +{ + struct device *dev = &pxpcs->pdev->dev; + int ret; + + pxpcs->cclk = devm_clk_get(dev, "csr"); + if (IS_ERR(pxpcs->cclk)) + return dev_err_probe(dev, PTR_ERR(pxpcs->cclk), + "Failed to get CSR clock\n"); + + pm_runtime_set_active(dev); + ret = devm_pm_runtime_enable(dev); + if (ret) { + dev_err(dev, "Failed to enable runtime-PM\n"); + return ret; + } + + return 0; +} + +static int xpcs_plat_init_bus(struct dw_xpcs_plat *pxpcs) +{ + struct device *dev = &pxpcs->pdev->dev; + static atomic_t id = ATOMIC_INIT(-1); + int ret; + + pxpcs->bus = devm_mdiobus_alloc_size(dev, 0); + if (!pxpcs->bus) + return -ENOMEM; + + pxpcs->bus->name = "DW XPCS MCI/APB3"; + pxpcs->bus->read = xpcs_mmio_read_c22; + pxpcs->bus->write = xpcs_mmio_write_c22; + pxpcs->bus->read_c45 = xpcs_mmio_read_c45; + pxpcs->bus->write_c45 = xpcs_mmio_write_c45; + pxpcs->bus->phy_mask = ~0; + pxpcs->bus->parent = dev; + pxpcs->bus->priv = pxpcs; + + snprintf(pxpcs->bus->id, MII_BUS_ID_SIZE, + "dwxpcs-%x", atomic_inc_return(&id)); + + /* MDIO-bus here serves as just a back-end engine abstracting out + * the MDIO and MCI/APB3 IO interfaces utilized for the DW XPCS CSRs + * access. + */ + ret = devm_mdiobus_register(dev, pxpcs->bus); + if (ret) { + dev_err(dev, "Failed to create MDIO bus\n"); + return ret; + } + + return 0; +} + +/* Note there is no need in the next function antagonist because the MDIO-bus + * de-registration will effectively remove and destroy all the MDIO-devices + * registered on the bus. + */ +static int xpcs_plat_init_dev(struct dw_xpcs_plat *pxpcs) +{ + struct device *dev = &pxpcs->pdev->dev; + struct mdio_device *mdiodev; + int ret; + + /* There is a single memory-mapped DW XPCS device */ + mdiodev = mdio_device_create(pxpcs->bus, 0); + if (IS_ERR(mdiodev)) + return PTR_ERR(mdiodev); + + /* Associate the FW-node with the device structure so it can be looked + * up later. Make sure DD-core is aware of the OF-node being re-used. + */ + device_set_node(&mdiodev->dev, fwnode_handle_get(dev_fwnode(dev))); + mdiodev->dev.of_node_reused = true; + + /* Pass the data further so the DW XPCS driver core could use it */ + mdiodev->dev.platform_data = (void *)device_get_match_data(dev); + + ret = mdio_device_register(mdiodev); + if (ret) { + dev_err(dev, "Failed to register MDIO device\n"); + goto err_clean_data; + } + + return 0; + +err_clean_data: + mdiodev->dev.platform_data = NULL; + + fwnode_handle_put(dev_fwnode(&mdiodev->dev)); + device_set_node(&mdiodev->dev, NULL); + + mdio_device_free(mdiodev); + + return ret; +} + +static int xpcs_plat_probe(struct platform_device *pdev) +{ + struct dw_xpcs_plat *pxpcs; + int ret; + + pxpcs = xpcs_plat_create_data(pdev); + if (IS_ERR(pxpcs)) + return PTR_ERR(pxpcs); + + ret = xpcs_plat_init_res(pxpcs); + if (ret) + return ret; + + ret = xpcs_plat_init_clk(pxpcs); + if (ret) + return ret; + + ret = xpcs_plat_init_bus(pxpcs); + if (ret) + return ret; + + ret = xpcs_plat_init_dev(pxpcs); + if (ret) + return ret; + + return 0; +} + +static int __maybe_unused xpcs_plat_pm_runtime_suspend(struct device *dev) +{ + struct dw_xpcs_plat *pxpcs = dev_get_drvdata(dev); + + clk_disable_unprepare(pxpcs->cclk); + + return 0; +} + +static int __maybe_unused xpcs_plat_pm_runtime_resume(struct device *dev) +{ + struct dw_xpcs_plat *pxpcs = dev_get_drvdata(dev); + + return clk_prepare_enable(pxpcs->cclk); +} + +static const struct dev_pm_ops xpcs_plat_pm_ops = { + SET_RUNTIME_PM_OPS(xpcs_plat_pm_runtime_suspend, + xpcs_plat_pm_runtime_resume, + NULL) +}; + +DW_XPCS_INFO_DECLARE(xpcs_generic, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_ID_NATIVE); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen1_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN1_3G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen2_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN2_3G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen2_6g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN2_6G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen4_3g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN4_3G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen4_6g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN4_6G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen5_10g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN5_10G_ID); +DW_XPCS_INFO_DECLARE(xpcs_pma_gen5_12g, DW_XPCS_ID_NATIVE, DW_XPCS_PMA_GEN5_12G_ID); + +static const struct of_device_id xpcs_of_ids[] = { + { .compatible = "snps,dw-xpcs", .data = &xpcs_generic }, + { .compatible = "snps,dw-xpcs-gen1-3g", .data = &xpcs_pma_gen1_3g }, + { .compatible = "snps,dw-xpcs-gen2-3g", .data = &xpcs_pma_gen2_3g }, + { .compatible = "snps,dw-xpcs-gen2-6g", .data = &xpcs_pma_gen2_6g }, + { .compatible = "snps,dw-xpcs-gen4-3g", .data = &xpcs_pma_gen4_3g }, + { .compatible = "snps,dw-xpcs-gen4-6g", .data = &xpcs_pma_gen4_6g }, + { .compatible = "snps,dw-xpcs-gen5-10g", .data = &xpcs_pma_gen5_10g }, + { .compatible = "snps,dw-xpcs-gen5-12g", .data = &xpcs_pma_gen5_12g }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, xpcs_of_ids); + +static struct platform_driver xpcs_plat_driver = { + .probe = xpcs_plat_probe, + .driver = { + .name = "dwxpcs", + .pm = &xpcs_plat_pm_ops, + .of_match_table = xpcs_of_ids, + }, +}; +module_platform_driver(xpcs_plat_driver); + +MODULE_DESCRIPTION("Synopsys DesignWare XPCS platform device driver"); +MODULE_AUTHOR("Signed-off-by: Serge Semin "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 6397dac82d1f..f4425a7c74d5 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -6,6 +6,7 @@ * Author: Jose Abreu */ +#include #include #include #include @@ -1244,7 +1245,9 @@ static int xpcs_get_id(struct dw_xpcs *xpcs) id |= ret; } - xpcs->info.pcs = id; + /* Set the PCS ID if it hasn't been pre-initialized */ + if (xpcs->info.pcs == DW_XPCS_ID_NATIVE) + xpcs->info.pcs = id; /* Find out PMA/PMD ID from MMD 1 device ID registers */ ret = xpcs_read(xpcs, MDIO_MMD_PMAPMD, MDIO_DEVID1); @@ -1263,7 +1266,9 @@ static int xpcs_get_id(struct dw_xpcs *xpcs) ret = (ret >> 10) & 0x3F; id |= ret << 16; - xpcs->info.pma = id; + /* Set the PMA ID if it hasn't been pre-initialized */ + if (xpcs->info.pma == DW_XPCS_PMA_ID_NATIVE) + xpcs->info.pma = id; return 0; } @@ -1387,10 +1392,49 @@ static void xpcs_free_data(struct dw_xpcs *xpcs) kfree(xpcs); } +static int xpcs_init_clks(struct dw_xpcs *xpcs) +{ + static const char *ids[DW_XPCS_NUM_CLKS] = { + [DW_XPCS_CORE_CLK] = "core", + [DW_XPCS_PAD_CLK] = "pad", + }; + struct device *dev = &xpcs->mdiodev->dev; + int ret, i; + + for (i = 0; i < DW_XPCS_NUM_CLKS; ++i) + xpcs->clks[i].id = ids[i]; + + ret = clk_bulk_get_optional(dev, DW_XPCS_NUM_CLKS, xpcs->clks); + if (ret) + return dev_err_probe(dev, ret, "Failed to get clocks\n"); + + ret = clk_bulk_prepare_enable(DW_XPCS_NUM_CLKS, xpcs->clks); + if (ret) + return dev_err_probe(dev, ret, "Failed to enable clocks\n"); + + return 0; +} + +static void xpcs_clear_clks(struct dw_xpcs *xpcs) +{ + clk_bulk_disable_unprepare(DW_XPCS_NUM_CLKS, xpcs->clks); + + clk_bulk_put(DW_XPCS_NUM_CLKS, xpcs->clks); +} + static int xpcs_init_id(struct dw_xpcs *xpcs) { + const struct dw_xpcs_info *info; int i, ret; + info = dev_get_platdata(&xpcs->mdiodev->dev); + if (!info) { + xpcs->info.pcs = DW_XPCS_ID_NATIVE; + xpcs->info.pma = DW_XPCS_PMA_ID_NATIVE; + } else { + xpcs->info = *info; + } + ret = xpcs_get_id(xpcs); if (ret < 0) return ret; @@ -1438,17 +1482,24 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, if (IS_ERR(xpcs)) return xpcs; + ret = xpcs_init_clks(xpcs); + if (ret) + goto out_free_data; + ret = xpcs_init_id(xpcs); if (ret) - goto out; + goto out_clear_clks; ret = xpcs_init_iface(xpcs, interface); if (ret) - goto out; + goto out_clear_clks; return xpcs; -out: +out_clear_clks: + xpcs_clear_clks(xpcs); + +out_free_data: xpcs_free_data(xpcs); return ERR_PTR(ret); @@ -1483,6 +1534,8 @@ void xpcs_destroy(struct dw_xpcs *xpcs) if (!xpcs) return; + xpcs_clear_clks(xpcs); + xpcs_free_data(xpcs); } EXPORT_SYMBOL_GPL(xpcs_destroy); diff --git a/drivers/net/pcs/pcs-xpcs.h b/drivers/net/pcs/pcs-xpcs.h index 369e9196f45a..fa05adfae220 100644 --- a/drivers/net/pcs/pcs-xpcs.h +++ b/drivers/net/pcs/pcs-xpcs.h @@ -6,6 +6,9 @@ * Author: Jose Abreu */ +#include +#include + /* Vendor regs access */ #define DW_VENDOR BIT(15) @@ -117,6 +120,9 @@ /* VR MII EEE Control 1 defines */ #define DW_VR_MII_EEE_TRN_LPI BIT(0) /* Transparent Mode Enable */ +#define DW_XPCS_INFO_DECLARE(_name, _pcs, _pma) \ + static const struct dw_xpcs_info _name = { .pcs = _pcs, .pma = _pma } + int xpcs_read(struct dw_xpcs *xpcs, int dev, u32 reg); int xpcs_write(struct dw_xpcs *xpcs, int dev, u32 reg, u16 val); int xpcs_read_vpcs(struct dw_xpcs *xpcs, int reg); diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 1dc60f5e653f..813be644647f 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -7,6 +7,8 @@ #ifndef __LINUX_PCS_XPCS_H #define __LINUX_PCS_XPCS_H +#include +#include #include #include #include @@ -21,6 +23,7 @@ struct dw_xpcs_desc; enum dw_xpcs_pcs_id { + DW_XPCS_ID_NATIVE = 0, NXP_SJA1105_XPCS_ID = 0x00000010, NXP_SJA1110_XPCS_ID = 0x00000020, DW_XPCS_ID = 0x7996ced0, @@ -28,6 +31,14 @@ enum dw_xpcs_pcs_id { }; enum dw_xpcs_pma_id { + DW_XPCS_PMA_ID_NATIVE = 0, + DW_XPCS_PMA_GEN1_3G_ID, + DW_XPCS_PMA_GEN2_3G_ID, + DW_XPCS_PMA_GEN2_6G_ID, + DW_XPCS_PMA_GEN4_3G_ID, + DW_XPCS_PMA_GEN4_6G_ID, + DW_XPCS_PMA_GEN5_10G_ID, + DW_XPCS_PMA_GEN5_12G_ID, WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80, }; @@ -36,10 +47,17 @@ struct dw_xpcs_info { u32 pma; }; +enum dw_xpcs_clock { + DW_XPCS_CORE_CLK, + DW_XPCS_PAD_CLK, + DW_XPCS_NUM_CLKS, +}; + struct dw_xpcs { struct dw_xpcs_info info; const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; + struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; struct phylink_pcs pcs; phy_interface_t interface; }; -- cgit v1.2.3 From 9cad7275463ab9e52aeae8d2ecb169afee6876f3 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:39 +0300 Subject: net: pcs: xpcs: Add fwnode-based descriptor creation method It's now possible to have the DW XPCS device defined as a standard platform device for instance in the platform DT-file. Although that functionality is useless unless there is a way to have the device found by the client drivers (STMMAC/DW *MAC, NXP SJA1105 Eth Switch, etc). Provide such ability by means of the xpcs_create_fwnode() method. It needs to be called with the device DW XPCS fwnode instance passed. That node will be then used to find the MDIO-device instance in order to create the DW XPCS descriptor. Note the method semantics and name is similar to what has been recently introduced in the Lynx PCS driver. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/pcs/pcs-xpcs.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pcs/pcs-xpcs.h | 3 +++ 2 files changed, 53 insertions(+) (limited to 'include') diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index f4425a7c74d5..82463f9d50c8 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -10,7 +10,9 @@ #include #include #include +#include #include +#include #include "pcs-xpcs.h" @@ -1505,6 +1507,16 @@ out_free_data: return ERR_PTR(ret); } +/** + * xpcs_create_mdiodev() - create a DW xPCS instance with the MDIO @addr + * @bus: pointer to the MDIO-bus descriptor for the device to be looked at + * @addr: device MDIO-bus ID + * @interface: requested PHY interface + * + * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if + * the PCS device couldn't be found on the bus and other negative errno related + * to the data allocation and MDIO-bus communications. + */ struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, phy_interface_t interface) { @@ -1529,6 +1541,44 @@ struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, } EXPORT_SYMBOL_GPL(xpcs_create_mdiodev); +/** + * xpcs_create_fwnode() - Create a DW xPCS instance from @fwnode + * @fwnode: fwnode handle poining to the DW XPCS device + * @interface: requested PHY interface + * + * Return: a pointer to the DW XPCS handle if successful, otherwise -ENODEV if + * the fwnode device is unavailable or the PCS device couldn't be found on the + * bus, -EPROBE_DEFER if the respective MDIO-device instance couldn't be found, + * other negative errno related to the data allocations and MDIO-bus + * communications. + */ +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, + phy_interface_t interface) +{ + struct mdio_device *mdiodev; + struct dw_xpcs *xpcs; + + if (!fwnode_device_is_available(fwnode)) + return ERR_PTR(-ENODEV); + + mdiodev = fwnode_mdio_find_device(fwnode); + if (!mdiodev) + return ERR_PTR(-EPROBE_DEFER); + + xpcs = xpcs_create(mdiodev, interface); + + /* xpcs_create() has taken a refcount on the mdiodev if it was + * successful. If xpcs_create() fails, this will free the mdio + * device here. In any case, we don't need to hold our reference + * anymore, and putting it here will allow mdio_device_put() in + * xpcs_destroy() to automatically free the mdio device. + */ + mdio_device_put(mdiodev); + + return xpcs; +} +EXPORT_SYMBOL_GPL(xpcs_create_fwnode); + void xpcs_destroy(struct dw_xpcs *xpcs) { if (!xpcs) diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 813be644647f..b4a4eb6c8866 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -8,6 +8,7 @@ #define __LINUX_PCS_XPCS_H #include +#include #include #include #include @@ -72,6 +73,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, phy_interface_t interface); +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, + phy_interface_t interface); void xpcs_destroy(struct dw_xpcs *xpcs); #endif /* __LINUX_PCS_XPCS_H */ -- cgit v1.2.3 From 351066bad6ade5ad0f5f90fde2dc759759959969 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:40 +0300 Subject: net: stmmac: Create DW XPCS device with particular address Currently the only STMMAC platform driver using the DW XPCS code is the Intel mGBE device driver. (It can be determined by finding all the drivers having the stmmac_mdio_bus_data::has_xpcs flag set.) At the same time the low-level platform driver masks out the DW XPCS MDIO-address from being auto-detected as PHY by the MDIO subsystem core. Seeing the PCS MDIO ID is known the procedure of the DW XPCS device creation can be simplified by dropping the loop over all the MDIO IDs. From now the DW XPCS device descriptor will be created for the MDIO-bus address pre-defined by the platform drivers via the stmmac_mdio_bus_data::pcs_mask field. Note besides this shall speed up a bit the Intel mGBE probing. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 19 ++++++------------- include/linux/stmmac.h | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 094d34c4193c..2dbfbca606af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -595,7 +595,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, /* Intel mgbe SGMII interface uses pcs-xcps */ if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { - plat->mdio_bus_data->has_xpcs = true; + plat->mdio_bus_data->pcs_mask = BIT(INTEL_MGBE_XPCS_ADDR); plat->mdio_bus_data->default_an_inband = true; plat->select_pcs = intel_mgbe_select_pcs; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index aa43117134d3..74de6ec00bbf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -499,8 +499,7 @@ int stmmac_pcs_setup(struct net_device *ndev) { struct dw_xpcs *xpcs = NULL; struct stmmac_priv *priv; - int ret = -ENODEV; - int mode, addr; + int addr, mode, ret; priv = netdev_priv(ndev); mode = priv->plat->phy_interface; @@ -508,16 +507,10 @@ int stmmac_pcs_setup(struct net_device *ndev) if (priv->plat->pcs_init) { ret = priv->plat->pcs_init(priv); } else if (priv->plat->mdio_bus_data && - priv->plat->mdio_bus_data->has_xpcs) { - /* Try to probe the XPCS by scanning all addresses */ - for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - xpcs = xpcs_create_mdiodev(priv->mii, addr, mode); - if (IS_ERR(xpcs)) - continue; - - ret = 0; - break; - } + priv->plat->mdio_bus_data->pcs_mask) { + addr = ffs(priv->plat->mdio_bus_data->pcs_mask) - 1; + xpcs = xpcs_create_mdiodev(priv->mii, addr, mode); + ret = PTR_ERR_OR_ZERO(xpcs); } else { return 0; } @@ -610,7 +603,7 @@ int stmmac_mdio_register(struct net_device *ndev) snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", new_bus->name, priv->plat->bus_id); new_bus->priv = ndev; - new_bus->phy_mask = mdio_bus_data->phy_mask; + new_bus->phy_mask = mdio_bus_data->phy_mask | mdio_bus_data->pcs_mask; new_bus->parent = priv->device; err = of_mdiobus_register(new_bus, mdio_node); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 9c54f82901a1..84e13bd5df28 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -82,7 +82,7 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; - unsigned int has_xpcs; + unsigned int pcs_mask; unsigned int default_an_inband; int *irqs; int probed_phy_irq; -- cgit v1.2.3 From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:52 +0200 Subject: net: psample: add user cookie Add a user cookie to the sample metadata so that sample emitters can provide more contextual information to samples. If present, send the user cookie in a new attribute: PSAMPLE_ATTR_USER_COOKIE. Reviewed-by: Michal Kubiak Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 2 ++ include/uapi/linux/psample.h | 1 + net/psample/psample.c | 9 ++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index 0509d2d6be67..2ac71260a546 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -25,6 +25,8 @@ struct psample_metadata { out_tc_occ_valid:1, latency_valid:1, unused:5; + const u8 *user_cookie; + u32 user_cookie_len; }; struct psample_group *psample_group_get(struct net *net, u32 group_num); diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..e80637e1d97b 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -19,6 +19,7 @@ enum { PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ + PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ __PSAMPLE_ATTR_MAX }; diff --git a/net/psample/psample.c b/net/psample/psample.c index a5d9b8446f77..b37488f426bc 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -386,7 +386,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)) + /* seq */ nla_total_size_64bit(sizeof(u64)) + /* timestamp */ - nla_total_size(sizeof(u16)); /* protocol */ + nla_total_size(sizeof(u16)) + /* protocol */ + (md->user_cookie_len ? + nla_total_size(md->user_cookie_len) : 0); /* user cookie */ #ifdef CONFIG_INET tun_info = skb_tunnel_info(skb); @@ -486,6 +488,11 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, } #endif + if (md->user_cookie && md->user_cookie_len && + nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len, + md->user_cookie)) + goto error; + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); -- cgit v1.2.3 From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:55 +0200 Subject: net: psample: allow using rate as probability Although not explicitly documented in the psample module itself, the definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample. Quoting tc-sample(8): "RATE of 100 will lead to an average of one sampled packet out of every 100 observed." With this semantics, the rates that we can express with an unsigned 32-bits number are very unevenly distributed and concentrated towards "sampling few packets". For example, we can express a probability of 2.32E-8% but we cannot express anything between 100% and 50%. For sampling applications that are capable of sampling a decent amount of packets, this sampling rate semantics is not very useful. Add a new flag to the uAPI that indicates that the sampling rate is expressed in scaled probability, this is: - 0 is 0% probability, no packets get sampled. - U32_MAX is 100% probability, all packets get sampled. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 3 ++- include/uapi/linux/psample.h | 10 +++++++++- net/psample/psample.c | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index 2ac71260a546..c52e9ebd88dd 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -24,7 +24,8 @@ struct psample_metadata { u8 out_tc_valid:1, out_tc_occ_valid:1, latency_valid:1, - unused:5; + rate_as_probability:1, + unused:4; const u8 *user_cookie; u32 user_cookie_len; }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e80637e1d97b..b765f0e81f20 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -8,7 +8,11 @@ enum { PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_GROUP_SEQ, - PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and + * sampled packets or scaled probability + * if PSAMPLE_ATTR_SAMPLE_PROBABILITY + * is set. + */ PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, @@ -20,6 +24,10 @@ enum { PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ + PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in + * PSAMPLE_ATTR_SAMPLE_RATE as a + * probability scaled 0 - U32_MAX. + */ __PSAMPLE_ATTR_MAX }; diff --git a/net/psample/psample.c b/net/psample/psample.c index 1c76f3e48dcd..f48b5b9cd409 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -497,6 +497,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, md->user_cookie)) goto error; + if (md->rate_as_probability) + nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); -- cgit v1.2.3 From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:56 +0200 Subject: net: openvswitch: add psample action Add support for a new action: psample. This action accepts a u32 group id and a variable-length cookie and uses the psample multicast group to make the packet available for observability. The maximum length of the user-defined cookie is set to 16, same as tc_cookie, to discourage using cookies that will not be offloadable. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Reviewed-by: Ilya Maximets Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ovs_flow.yaml | 17 +++++++++++ include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++ net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 48 +++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 32 ++++++++++++++++++++- 5 files changed, 125 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 4fdfc6b5cae9..46f5d1cd8a5f 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -727,6 +727,12 @@ attribute-sets: name: dec-ttl type: nest nested-attributes: dec-ttl-attrs + - + name: psample + type: nest + nested-attributes: psample-attrs + doc: | + Sends a packet sample to psample for external observation. - name: tunnel-key-attrs enum-name: ovs-tunnel-key-attr @@ -938,6 +944,17 @@ attribute-sets: - name: gbp type: u32 + - + name: psample-attrs + enum-name: ovs-psample-attr + name-prefix: ovs-psample-attr- + attributes: + - + name: group + type: u32 + - + name: cookie + type: binary operations: name-prefix: ovs-flow-cmd- diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efc82c318fa2..3dd653748725 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -914,6 +914,31 @@ struct check_pkt_len_arg { }; #endif +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16 +/** + * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE + * action. + * + * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the + * sample. + * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that + * contains user-defined metadata. The maximum length is + * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes. + * + * Sends the packet to the psample multicast group with the specified group and + * cookie. It is possible to combine this action with the + * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample. + */ +enum ovs_psample_attr { + OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */ + OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */ + + /* private: */ + __OVS_PSAMPLE_ATTR_MAX +}; + +#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * @@ -966,6 +991,8 @@ struct check_pkt_len_arg { * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. * @OVS_ACTION_ATTR_DROP: Explicit drop action. + * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers + * via psample. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1004,6 +1031,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */ + OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 29a7081858cd..2535f3f9f462 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -10,6 +10,7 @@ config OPENVSWITCH (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) + depends on PSAMPLE || !PSAMPLE select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 964225580824..892d7e48fc5b 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -24,6 +24,11 @@ #include #include #include + +#if IS_ENABLED(CONFIG_PSAMPLE) +#include +#endif + #include #include "datapath.h" @@ -1299,6 +1304,40 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) return 0; } +#if IS_ENABLED(CONFIG_PSAMPLE) +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{ + struct psample_group psample_group = {}; + struct psample_metadata md = {}; + const struct nlattr *a; + int rem; + + nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { + switch (nla_type(a)) { + case OVS_PSAMPLE_ATTR_GROUP: + psample_group.group_num = nla_get_u32(a); + break; + + case OVS_PSAMPLE_ATTR_COOKIE: + md.user_cookie = nla_data(a); + md.user_cookie_len = nla_len(a); + break; + } + } + + psample_group.net = ovs_dp_get_net(dp); + md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; + md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + + psample_sample_packet(&psample_group, skb, 0, &md); +} +#else +static void execute_psample(struct datapath *dp, struct sk_buff *skb, + const struct nlattr *attr) +{} +#endif + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1502,6 +1541,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ovs_kfree_skb_reason(skb, reason); return 0; } + + case OVS_ACTION_ATTR_PSAMPLE: + execute_psample(dp, skb, a); + OVS_CB(skb)->cutlen = 0; + if (nla_is_last(a, rem)) { + consume_skb(skb); + return 0; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f224d9bcea5e..c92bdc4dfe19 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_DROP: + case OVS_ACTION_ATTR_PSAMPLE: break; case OVS_ACTION_ATTR_CT: @@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25); if (!actions) return; @@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net, return 0; } +static int validate_psample(const struct nlattr *attr) +{ + static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = { + [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 }, + [OVS_PSAMPLE_ATTR_COOKIE] = { + .type = NLA_BINARY, + .len = OVS_PSAMPLE_COOKIE_MAX_SIZE, + }, + }; + struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1]; + int err; + + if (!IS_ENABLED(CONFIG_PSAMPLE)) + return -EOPNOTSUPP; + + err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL); + if (err) + return err; + + return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL; +} + static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { @@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DROP] = sizeof(u32), + [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_PSAMPLE: + err = validate_psample(a); + if (err) + return err; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; -- cgit v1.2.3 From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:57 +0200 Subject: net: openvswitch: store sampling probability in cb. When a packet sample is observed, the sampling rate that was used is important to estimate the real frequency of such event. Store the probability of the parent sample action in the skb's cb area and use it in psample action to pass it down to psample module. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ilya Maximets Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 ++- net/openvswitch/actions.c | 20 +++++++++++++++++--- net/openvswitch/datapath.h | 3 +++ net/openvswitch/vport.c | 1 + 4 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3dd653748725..3a701bd1f31b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -649,7 +649,8 @@ enum ovs_flow_attr { * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet - * basis. + * basis. Nested actions will be able to access the probability value of the + * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 892d7e48fc5b..101f9a23792c 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1048,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb, struct nlattr *sample_arg; int rem = nla_len(attr); const struct sample_arg *arg; + u32 init_probability; bool clone_flow_key; + int err; /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); + init_probability = OVS_CB(skb)->probability; if ((arg->probability != U32_MAX) && (!arg->probability || get_random_u32() > arg->probability)) { @@ -1062,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb, return 0; } + OVS_CB(skb)->probability = arg->probability; + clone_flow_key = !arg->exec; - return clone_execute(dp, skb, key, 0, actions, rem, last, - clone_flow_key); + err = clone_execute(dp, skb, key, 0, actions, rem, last, + clone_flow_key); + + if (!last) + OVS_CB(skb)->probability = init_probability; + + return err; } /* When 'last' is true, clone() should always consume the 'skb'. @@ -1311,6 +1321,7 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, struct psample_group psample_group = {}; struct psample_metadata md = {}; const struct nlattr *a; + u32 rate; int rem; nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { @@ -1329,8 +1340,11 @@ static void execute_psample(struct datapath *dp, struct sk_buff *skb, psample_group.net = ovs_dp_get_net(dp); md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex; md.trunc_size = skb->len - OVS_CB(skb)->cutlen; + md.rate_as_probability = 1; + + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX; - psample_sample_packet(&psample_group, skb, 0, &md); + psample_sample_packet(&psample_group, skb, rate, &md); } #else static void execute_psample(struct datapath *dp, struct sk_buff *skb, diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 0cd29971a907..9ca6231ea647 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -115,12 +115,15 @@ struct datapath { * fragmented. * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. + * @probability: The sampling probability that was applied to this skb; 0 means + * no sampling has occurred; U32_MAX means 100% probability. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; u16 acts_origlen; u32 cutlen; + u32 probability; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 972ae01a70f7..8732f6e51ae5 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->mru = 0; OVS_CB(skb)->cutlen = 0; + OVS_CB(skb)->probability = 0; if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { u32 mark; -- cgit v1.2.3 From e46296002113b4556baffd5dc68c4f9e22dae13a Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:56 +0200 Subject: net: ethtool: pse-pd: Expand C33 PSE status with class, power and extended state This update expands the status information provided by ethtool for PSE c33. It includes details such as the detected class, current power delivered, and extended state information. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-1-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 37 ++++++ include/linux/ethtool.h | 15 +++ include/linux/pse-pd/pse.h | 8 ++ include/uapi/linux/ethtool.h | 191 +++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 4 + net/ethtool/pse-pd.c | 32 ++++- 6 files changed, 286 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index bfe2eda8580d..0656ad4be000 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1740,6 +1740,13 @@ Kernel response contents: PSE functions. ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. + ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of + the PoE PSE. ====================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies @@ -1772,6 +1779,36 @@ The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_PW_D_STATUS`` implementing .. kernel-doc:: include/uapi/linux/ethtool.h :identifiers: ethtool_c33_pse_pw_d_status +When set, the optional ``ETHTOOL_A_C33_PSE_PW_CLASS`` attribute identifies +the power class of the C33 PSE. It depends on the class negotiated between +the PSE and the PD. This option is corresponding to ``IEEE 802.3-2022`` +30.9.1.1.8 aPSEPowerClassification. + +When set, the optional ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` attribute identifies +This option is corresponding to ``IEEE 802.3-2022`` 30.9.1.1.23 aPSEActualPower. +Actual power is reported in mW. + +When set, the optional ``ETHTOOL_A_C33_PSE_EXT_STATE`` attribute identifies +the extended error state of the C33 PSE. Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_c33_pse_ext_state + +When set, the optional ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` attribute identifies +the extended error state of the C33 PSE. Possible values are: +Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_c33_pse_ext_substate_class_num_events + ethtool_c33_pse_ext_substate_error_condition + ethtool_c33_pse_ext_substate_mr_pse_enable + ethtool_c33_pse_ext_substate_option_detect_ted + ethtool_c33_pse_ext_substate_option_vport_lim + ethtool_c33_pse_ext_substate_ovld_detected + ethtool_c33_pse_ext_substate_pd_dll_power_type + ethtool_c33_pse_ext_substate_power_not_available + ethtool_c33_pse_ext_substate_short_detected + PSE_SET ======= diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3a99238ef895..3e70f5d9e0bb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1273,4 +1273,19 @@ struct ethtool_forced_speed_map { void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); + +/* C33 PSE extended state and substate. */ +struct ethtool_c33_pse_ext_state_info { + enum ethtool_c33_pse_ext_state c33_pse_ext_state; + union { + enum ethtool_c33_pse_ext_substate_error_condition error_condition; + enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; + enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; + enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; + enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; + enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; + enum ethtool_c33_pse_ext_substate_short_detected short_detected; + u32 __c33_pse_ext_substate; + }; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6eec24ffa866..38b9308e5e7a 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -36,12 +36,20 @@ struct pse_control_config { * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState * @c33_pw_status: power detection status of the PSE. * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: + * @c33_pw_class: detected class of a powered PD + * IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification + * @c33_actual_pw: power currently delivered by the PSE in mW + * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower + * @c33_ext_state_info: extended state information of the PSE */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; enum ethtool_c33_pse_pw_d_status c33_pw_status; + u32 c33_pw_class; + u32 c33_actual_pw; + struct ethtool_c33_pse_ext_state_info c33_ext_state_info; }; /** diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e011384c915c..230110b97029 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,197 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_c33_pse_ext_state - groups of PSE extended states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted + * states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states + * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states + * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type + * states + * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available + * states + * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states + */ +enum ethtool_c33_pse_ext_state { + ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1, + ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID, + ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM, + ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED, + ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE, + ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE, + ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload + * state + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not + * connected + * + * The PSE monitors either the DC or AC Maintain Power Signature + * (MPS, see 33.2.9.1). This variable indicates the presence or absence of + * a valid MPS. + */ +enum ethtool_c33_pse_ext_substate_mr_mps_valid { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN, +}; + +/** + * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing + * port number + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal + * hardware fault + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON: + * Communication error after force on + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown + * port status + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host + * crash turn off + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN: + * Host crash force shutdown + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration + * change + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over + * temperature detected + * + * error_condition is a variable indicating the status of + * implementation-specific fault conditions or optionally other system faults + * that prevent the PSE from meeting the specifications in Table 33–11 and that + * require the PSE not to source power. These error conditions are different + * from those monitored by the state diagrams in Figure 33–10. + */ +enum ethtool_c33_pse_ext_substate_error_condition { + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable + * pin active + * + * mr_pse_enable is control variable that selects PSE operation and test + * functions. + */ +enum ethtool_c33_pse_ext_substate_mr_pse_enable { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection + * in process + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR: + * Connection check error + * + * option_detect_ted is a variable indicating if detection can be performed + * by the PSE during the ted_timer interval. + */ +enum ethtool_c33_pse_ext_substate_option_detect_ted { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply + * voltage is high + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply + * voltage is low + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage + * injection into the port + * + * option_vport_lim is an optional variable indicates if VPSE is out of the + * operating range during normal operating state. + */ +enum ethtool_c33_pse_ext_substate_option_vport_lim { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION, +}; + +/** + * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state + * + * ovld_detected is a variable indicating if the PSE output current has been + * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second + * sliding time. + */ +enum ethtool_c33_pse_ext_substate_ovld_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power + * budget exceeded for the controller + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET: + * Configured port power limit exceeded controller power budget + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT: + * Power request from PD exceeds port limit + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power + * denied due to Hardware power limit + * + * power_not_available is a variable that is asserted in an + * implementation-dependent manner when the PSE is no longer capable of + * sourcing sufficient power to support the attached PD. Sufficient power + * is defined by classification; see 33.2.6. + */ +enum ethtool_c33_pse_ext_substate_power_not_available { + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT, +}; + +/** + * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short + * condition was detected + * + * short_detected is a variable indicating if the PSE output current has been + * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7). + */ +enum ethtool_c33_pse_ext_substate_short_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1, +}; + /** * enum ethtool_pse_types - Types of PSE controller. * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 840dabdc9d88..b8895da001bc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -939,6 +939,10 @@ enum { ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2c981d443f27..d2a1c14d789f 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -86,7 +86,16 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, len += nla_total_size(sizeof(u32)); /* _C33_PSE_ADMIN_STATE */ if (st->c33_pw_status > 0) len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_D_STATUS */ - + if (st->c33_pw_class > 0) + len += nla_total_size(sizeof(u32)); /* _C33_PSE_PW_CLASS */ + if (st->c33_actual_pw > 0) + len += nla_total_size(sizeof(u32)); /* _C33_PSE_ACTUAL_PW */ + if (st->c33_ext_state_info.c33_pse_ext_state > 0) { + len += nla_total_size(sizeof(u32)); /* _C33_PSE_EXT_STATE */ + if (st->c33_ext_state_info.__c33_pse_ext_substate > 0) + /* _C33_PSE_EXT_SUBSTATE */ + len += nla_total_size(sizeof(u32)); + } return len; } @@ -117,6 +126,27 @@ static int pse_fill_reply(struct sk_buff *skb, st->c33_pw_status)) return -EMSGSIZE; + if (st->c33_pw_class > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_CLASS, + st->c33_pw_class)) + return -EMSGSIZE; + + if (st->c33_actual_pw > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_ACTUAL_PW, + st->c33_actual_pw)) + return -EMSGSIZE; + + if (st->c33_ext_state_info.c33_pse_ext_state > 0) { + if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_STATE, + st->c33_ext_state_info.c33_pse_ext_state)) + return -EMSGSIZE; + + if (st->c33_ext_state_info.__c33_pse_ext_substate > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_EXT_SUBSTATE, + st->c33_ext_state_info.__c33_pse_ext_substate)) + return -EMSGSIZE; + } + return 0; } -- cgit v1.2.3 From 4a83abcef5f4f36be4e04ba18edd64226f6f4a19 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:59 +0200 Subject: net: pse-pd: Add new power limit get and set c33 features This patch add a way to get and set the power limit of a PSE PI. For that it uses regulator API callbacks wrapper like get_voltage() and get/set_current_limit() as power is simply V * I. We used mW unit as defined by the IEEE 802.3-2022 standards. set_current_limit() uses the voltage return by get_voltage() and the desired power limit to calculate the current limit. get_voltage() callback is then mandatory to set the power limit. get_current_limit() callback is by default looking at a driver callback and fallback to extracting the current limit from _pse_ethtool_get_status() if the driver does not set its callback. We prefer let the user the choice because ethtool_get_status return much more information than the current limit. expand pse status with c33_pw_limit_ranges to return the ranges available to configure the power limit. Reviewed-by: Sai Krishna Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-4-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/pse-pd/pse_core.c | 172 +++++++++++++++++++++++++++++++++++++++--- include/linux/pse-pd/pse.h | 43 +++++++++++ 2 files changed, 204 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 795ab264eaf2..16f364b3473b 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -265,10 +265,113 @@ static int pse_pi_disable(struct regulator_dev *rdev) return ret; } +static int _pse_pi_get_voltage(struct regulator_dev *rdev) +{ + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + const struct pse_controller_ops *ops; + int id; + + ops = pcdev->ops; + if (!ops->pi_get_voltage) + return -EOPNOTSUPP; + + id = rdev_get_id(rdev); + return ops->pi_get_voltage(pcdev, id); +} + +static int pse_pi_get_voltage(struct regulator_dev *rdev) +{ + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + int ret; + + mutex_lock(&pcdev->lock); + ret = _pse_pi_get_voltage(rdev); + mutex_unlock(&pcdev->lock); + + return ret; +} + +static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev, + int id, + struct netlink_ext_ack *extack, + struct pse_control_status *status); + +static int pse_pi_get_current_limit(struct regulator_dev *rdev) +{ + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + const struct pse_controller_ops *ops; + struct netlink_ext_ack extack = {}; + struct pse_control_status st = {}; + int id, uV, ret; + s64 tmp_64; + + ops = pcdev->ops; + id = rdev_get_id(rdev); + mutex_lock(&pcdev->lock); + if (ops->pi_get_current_limit) { + ret = ops->pi_get_current_limit(pcdev, id); + goto out; + } + + /* If pi_get_current_limit() callback not populated get voltage + * from pi_get_voltage() and power limit from ethtool_get_status() + * to calculate current limit. + */ + ret = _pse_pi_get_voltage(rdev); + if (!ret) { + dev_err(pcdev->dev, "Voltage null\n"); + ret = -ERANGE; + goto out; + } + if (ret < 0) + goto out; + uV = ret; + + ret = _pse_ethtool_get_status(pcdev, id, &extack, &st); + if (ret) + goto out; + + if (!st.c33_avail_pw_limit) { + ret = -ENODATA; + goto out; + } + + tmp_64 = st.c33_avail_pw_limit; + tmp_64 *= 1000000000ull; + /* uA = mW * 1000000000 / uV */ + ret = DIV_ROUND_CLOSEST_ULL(tmp_64, uV); + +out: + mutex_unlock(&pcdev->lock); + return ret; +} + +static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, + int max_uA) +{ + struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev); + const struct pse_controller_ops *ops; + int id, ret; + + ops = pcdev->ops; + if (!ops->pi_set_current_limit) + return -EOPNOTSUPP; + + id = rdev_get_id(rdev); + mutex_lock(&pcdev->lock); + ret = ops->pi_set_current_limit(pcdev, id, max_uA); + mutex_unlock(&pcdev->lock); + + return ret; +} + static const struct regulator_ops pse_pi_ops = { .is_enabled = pse_pi_is_enabled, .enable = pse_pi_enable, .disable = pse_pi_disable, + .get_voltage = pse_pi_get_voltage, + .get_current_limit = pse_pi_get_current_limit, + .set_current_limit = pse_pi_set_current_limit, }; static int @@ -298,7 +401,9 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, rdesc->ops = &pse_pi_ops; rdesc->owner = pcdev->owner; - rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS | + REGULATOR_CHANGE_CURRENT; + rinit_data->constraints.max_uA = MAX_PI_CURRENT; rinit_data->supply_regulator = "vpwr"; rconfig.dev = pcdev->dev; @@ -626,6 +731,23 @@ out: } EXPORT_SYMBOL_GPL(of_pse_control_get); +static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev, + int id, + struct netlink_ext_ack *extack, + struct pse_control_status *status) +{ + const struct pse_controller_ops *ops; + + ops = pcdev->ops; + if (!ops->ethtool_get_status) { + NL_SET_ERR_MSG(extack, + "PSE driver does not support status report"); + return -EOPNOTSUPP; + } + + return ops->ethtool_get_status(pcdev, id, extack, status); +} + /** * pse_ethtool_get_status - get status of PSE control * @psec: PSE control pointer @@ -638,19 +760,10 @@ int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - const struct pse_controller_ops *ops; int err; - ops = psec->pcdev->ops; - - if (!ops->ethtool_get_status) { - NL_SET_ERR_MSG(extack, - "PSE driver does not support status report"); - return -EOPNOTSUPP; - } - mutex_lock(&psec->pcdev->lock); - err = ops->ethtool_get_status(psec->pcdev, psec->id, extack, status); + err = _pse_ethtool_get_status(psec->pcdev, psec->id, extack, status); mutex_unlock(&psec->pcdev->lock); return err; @@ -732,6 +845,43 @@ int pse_ethtool_set_config(struct pse_control *psec, } EXPORT_SYMBOL_GPL(pse_ethtool_set_config); +/** + * pse_ethtool_set_pw_limit - set PSE control power limit + * @psec: PSE control pointer + * @extack: extack for reporting useful error messages + * @pw_limit: power limit value in mW + * + * Return: 0 on success and failure value on error + */ +int pse_ethtool_set_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack, + const unsigned int pw_limit) +{ + int uV, uA, ret; + s64 tmp_64; + + ret = regulator_get_voltage(psec->ps); + if (!ret) { + NL_SET_ERR_MSG(extack, + "Can't calculate the current, PSE voltage read is 0"); + return -ERANGE; + } + if (ret < 0) { + NL_SET_ERR_MSG(extack, + "Error reading PSE voltage"); + return ret; + } + uV = ret; + + tmp_64 = pw_limit; + tmp_64 *= 1000000000ull; + /* uA = mW * 1000000000 / uV */ + uA = DIV_ROUND_CLOSEST_ULL(tmp_64, uV); + + return regulator_set_current_limit(psec->ps, 0, uA); +} +EXPORT_SYMBOL_GPL(pse_ethtool_set_pw_limit); + bool pse_has_podl(struct pse_control *psec) { return psec->pcdev->types & ETHTOOL_PSE_PODL; diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 38b9308e5e7a..591a53e082e6 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -9,6 +9,9 @@ #include #include +/* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ +#define MAX_PI_CURRENT 1920000 + struct phy_device; struct pse_controller_dev; @@ -41,6 +44,12 @@ struct pse_control_config { * @c33_actual_pw: power currently delivered by the PSE in mW * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower * @c33_ext_state_info: extended state information of the PSE + * @c33_avail_pw_limit: available power limit of the PSE in mW + * IEEE 802.3-2022 145.2.5.4 pse_avail_pwr + * @c33_pw_limit_ranges: supported power limit configuration range. The driver + * is in charge of the memory allocation. + * @c33_pw_limit_nb_ranges: number of supported power limit configuration + * ranges */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; @@ -50,6 +59,9 @@ struct pse_control_status { u32 c33_pw_class; u32 c33_actual_pw; struct ethtool_c33_pse_ext_state_info c33_ext_state_info; + u32 c33_avail_pw_limit; + struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; + u32 c33_pw_limit_nb_ranges; }; /** @@ -61,6 +73,14 @@ struct pse_control_status { * May also return negative errno. * @pi_enable: Configure the PSE PI as enabled. * @pi_disable: Configure the PSE PI as disabled. + * @pi_get_voltage: Return voltage similarly to get_voltage regulator + * callback. + * @pi_get_current_limit: Get the configured current limit similarly to + * get_current_limit regulator callback. + * @pi_set_current_limit: Configure the current limit similarly to + * set_current_limit regulator callback. + * Should not return an error in case of MAX_PI_CURRENT + * current value set. */ struct pse_controller_ops { int (*ethtool_get_status)(struct pse_controller_dev *pcdev, @@ -70,6 +90,11 @@ struct pse_controller_ops { int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id); int (*pi_enable)(struct pse_controller_dev *pcdev, int id); int (*pi_disable)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_current_limit)(struct pse_controller_dev *pcdev, + int id); + int (*pi_set_current_limit)(struct pse_controller_dev *pcdev, + int id, int max_uA); }; struct module; @@ -156,6 +181,11 @@ int pse_ethtool_get_status(struct pse_control *psec, int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); +int pse_ethtool_set_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack, + const unsigned int pw_limit); +int pse_ethtool_get_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -185,6 +215,19 @@ static inline int pse_ethtool_set_config(struct pse_control *psec, return -EOPNOTSUPP; } +static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack, + const unsigned int pw_limit) +{ + return -EOPNOTSUPP; +} + +static inline int pse_ethtool_get_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline bool pse_has_podl(struct pse_control *psec) { return false; -- cgit v1.2.3 From 30d7b6727724ce3729f2cb5b8be985d2d1931d2b Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:12:00 +0200 Subject: net: ethtool: Add new power limit get and set features This patch expands the status information provided by ethtool for PSE c33 with available power limit and available power limit ranges. It also adds a call to pse_ethtool_set_pw_limit() to configure the PSE control power limit. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-5-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 64 ++++++++++++++------ include/linux/ethtool.h | 5 ++ include/uapi/linux/ethtool_netlink.h | 8 +++ net/ethtool/pse-pd.c | 89 +++++++++++++++++++++++++--- 4 files changed, 141 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 0656ad4be000..3ab423b80e91 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1730,24 +1730,28 @@ Request contents: Kernel response contents: - ====================================== ====== ============================= - ``ETHTOOL_A_PSE_HEADER`` nested reply header - ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL - PSE functions - ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the - PoDL PSE. - ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE - PSE functions. - ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the - PoE PSE. - ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. - ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the - PoE PSE. - ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the - PoE PSE. - ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of - the PoE PSE. - ====================================== ====== ============================= + ========================================== ====== ============================= + ``ETHTOOL_A_PSE_HEADER`` nested reply header + ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` u32 Operational state of the PoDL + PSE functions + ``ETHTOOL_A_PODL_PSE_PW_D_STATUS`` u32 power detection status of the + PoDL PSE. + ``ETHTOOL_A_C33_PSE_ADMIN_STATE`` u32 Operational state of the PoE + PSE functions. + ``ETHTOOL_A_C33_PSE_PW_D_STATUS`` u32 power detection status of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_CLASS`` u32 power class of the PoE PSE. + ``ETHTOOL_A_C33_PSE_ACTUAL_PW`` u32 actual power drawn on the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_STATE`` u32 power extended state of the + PoE PSE. + ``ETHTOOL_A_C33_PSE_EXT_SUBSTATE`` u32 power extended substatus of + the PoE PSE. + ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` u32 currently configured power + limit of the PoE PSE. + ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested Supported power limit + configuration ranges. + ========================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` attribute identifies the operational state of the PoDL PSE functions. The operational state of the @@ -1809,6 +1813,16 @@ Possible values are: ethtool_c33_pse_ext_substate_power_not_available ethtool_c33_pse_ext_substate_short_detected +When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT`` attribute +identifies the C33 PSE power limit in mW. + +When set the optional ``ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES`` nested attribute +identifies the C33 PSE power limit ranges through +``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MIN`` and +``ETHTOOL_A_C33_PSE_PWR_VAL_LIMIT_RANGE_MAX``. +If the controller works with fixed classes, the min and max values will be +equal. + PSE_SET ======= @@ -1820,6 +1834,8 @@ Request contents: ``ETHTOOL_A_PSE_HEADER`` nested request header ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` u32 Control PoDL PSE Admin state ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` u32 Control PSE Admin state + ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` u32 Control PoE PSE available + power limit ====================================== ====== ============================= When set, the optional ``ETHTOOL_A_PODL_PSE_ADMIN_CONTROL`` attribute is used @@ -1830,6 +1846,18 @@ to control PoDL PSE Admin functions. This option is implementing The same goes for ``ETHTOOL_A_C33_PSE_ADMIN_CONTROL`` implementing ``IEEE 802.3-2022`` 30.9.1.2.1 acPSEAdminControl. +When set, the optional ``ETHTOOL_A_C33_PSE_AVAIL_PWR_LIMIT`` attribute is +used to control the available power value limit for C33 PSE in milliwatts. +This attribute corresponds to the `pse_available_power` variable described in +``IEEE 802.3-2022`` 33.2.4.4 Variables and `pse_avail_pwr` in 145.2.5.4 +Variables, which are described in power classes. + +It was decided to use milliwatts for this interface to unify it with other +power monitoring interfaces, which also use milliwatts, and to align with +various existing products that document power consumption in watts rather than +classes. If power limit configuration based on classes is needed, the +conversion can be done in user space, for example by ethtool. + RSS_GET ======= diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3e70f5d9e0bb..e213b5508da6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1288,4 +1288,9 @@ struct ethtool_c33_pse_ext_state_info { u32 __c33_pse_ext_substate; }; }; + +struct ethtool_c33_pse_pw_limit_range { + u32 min; + u32 max; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b8895da001bc..6d5bdcc67631 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -930,6 +930,12 @@ enum { }; /* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + enum { ETHTOOL_A_PSE_UNSPEC, ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ @@ -943,6 +949,8 @@ enum { ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index d2a1c14d789f..ba46c9c8b12d 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -96,9 +96,46 @@ static int pse_reply_size(const struct ethnl_req_info *req_base, /* _C33_PSE_EXT_SUBSTATE */ len += nla_total_size(sizeof(u32)); } + if (st->c33_avail_pw_limit > 0) + /* _C33_AVAIL_PSE_PW_LIMIT */ + len += nla_total_size(sizeof(u32)); + if (st->c33_pw_limit_nb_ranges > 0) + /* _C33_PSE_PW_LIMIT_RANGES */ + len += st->c33_pw_limit_nb_ranges * + (nla_total_size(0) + + nla_total_size(sizeof(u32)) * 2); + return len; } +static int pse_put_pw_limit_ranges(struct sk_buff *skb, + const struct pse_control_status *st) +{ + const struct ethtool_c33_pse_pw_limit_range *pw_limit_ranges; + int i; + + pw_limit_ranges = st->c33_pw_limit_ranges; + for (i = 0; i < st->c33_pw_limit_nb_ranges; i++) { + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, + pw_limit_ranges->min) || + nla_put_u32(skb, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, + pw_limit_ranges->max)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + nla_nest_end(skb, nest); + pw_limit_ranges++; + } + + return 0; +} + static int pse_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) @@ -147,9 +184,25 @@ static int pse_fill_reply(struct sk_buff *skb, return -EMSGSIZE; } + if (st->c33_avail_pw_limit > 0 && + nla_put_u32(skb, ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, + st->c33_avail_pw_limit)) + return -EMSGSIZE; + + if (st->c33_pw_limit_nb_ranges > 0 && + pse_put_pw_limit_ranges(skb, st)) + return -EMSGSIZE; + return 0; } +static void pse_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct pse_reply_data *data = PSE_REPDATA(reply_base); + + kfree(data->status.c33_pw_limit_ranges); +} + /* PSE_SET */ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { @@ -160,6 +213,7 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = { [ETHTOOL_A_C33_PSE_ADMIN_CONTROL] = NLA_POLICY_RANGE(NLA_U32, ETHTOOL_C33_PSE_ADMIN_STATE_DISABLED, ETHTOOL_C33_PSE_ADMIN_STATE_ENABLED), + [ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT] = { .type = NLA_U32 }, }; static int @@ -202,19 +256,39 @@ static int ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; - struct pse_control_config config = {}; struct nlattr **tb = info->attrs; struct phy_device *phydev; + int ret = 0; phydev = dev->phydev; + + if (tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]) { + unsigned int pw_limit; + + pw_limit = nla_get_u32(tb[ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT]); + ret = pse_ethtool_set_pw_limit(phydev->psec, info->extack, + pw_limit); + if (ret) + return ret; + } + /* These values are already validated by the ethnl_pse_set_policy */ - if (pse_has_podl(phydev->psec)) - config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); - if (pse_has_c33(phydev->psec)) - config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); + if (tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL] || + tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]) { + struct pse_control_config config = {}; + + if (pse_has_podl(phydev->psec)) + config.podl_admin_control = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); + if (pse_has_c33(phydev->psec)) + config.c33_admin_control = nla_get_u32(tb[ETHTOOL_A_C33_PSE_ADMIN_CONTROL]); + + ret = pse_ethtool_set_config(phydev->psec, info->extack, + &config); + if (ret) + return ret; + } - /* Return errno directly - PSE has no notification */ - return pse_ethtool_set_config(phydev->psec, info->extack, &config); + return ret; } const struct ethnl_request_ops ethnl_pse_request_ops = { @@ -227,6 +301,7 @@ const struct ethnl_request_ops ethnl_pse_request_ops = { .prepare_data = pse_prepare_data, .reply_size = pse_reply_size, .fill_reply = pse_fill_reply, + .cleanup_data = pse_cleanup_data, .set_validate = ethnl_set_pse_validate, .set = ethnl_set_pse, -- cgit v1.2.3 From e6c06ca8f21d1cdb444c708e385d86a54bc5fc60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:30 +0200 Subject: wifi: cfg80211: add support for advertising multiple radios belonging to a wiphy The prerequisite for MLO support in cfg80211/mac80211 is that all the links participating in MLO must be from the same wiphy/ieee80211_hw. To meet this expectation, some drivers may need to group multiple discrete hardware each acting as a link in MLO under single wiphy. With this change, supported frequencies and interface combinations of each individual radio are reported to user space. This allows user space to figure out the limitations of what combination of channels can be used concurrently. Even for non-MLO devices, this improves support for devices capable of running on multiple channels at the same time. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/18a88f9ce82b1c9f7c12f1672430eaf2bb0be295.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 42 +++++++++++++++- include/uapi/linux/nl80211.h | 65 +++++++++++++++++++++++++ net/wireless/nl80211.c | 113 ++++++++++++++++++++++++++++++++++++++----- 3 files changed, 208 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6f992aff74ae..a00cf80e61dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5046,7 +5046,9 @@ struct ieee80211_iface_limit { * struct ieee80211_iface_combination - possible interface combination * * With this structure the driver can describe which interface - * combinations it supports concurrently. + * combinations it supports concurrently. When set in a struct wiphy_radio, + * the combinations refer to combinations of interfaces currently active on + * that radio. * * Examples: * @@ -5406,6 +5408,38 @@ struct wiphy_iftype_akm_suites { int n_akm_suites; }; +/** + * struct wiphy_radio_freq_range - wiphy frequency range + * @start_freq: start range edge frequency (kHz) + * @end_freq: end range edge frequency (kHz) + */ +struct wiphy_radio_freq_range { + u32 start_freq; + u32 end_freq; +}; + + +/** + * struct wiphy_radio - physical radio of a wiphy + * This structure describes a physical radio belonging to a wiphy. + * It is used to describe concurrent-channel capabilities. Only one channel + * can be active on the radio described by struct wiphy_radio. + * + * @freq_range: frequency range that the radio can operate on. + * @n_freq_range: number of elements in @freq_range + * + * @iface_combinations: Valid interface combinations array, should not + * list single interface types. + * @n_iface_combinations: number of entries in @iface_combinations array. + */ +struct wiphy_radio { + const struct wiphy_radio_freq_range *freq_range; + int n_freq_range; + + const struct ieee80211_iface_combination *iface_combinations; + int n_iface_combinations; +}; + #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff /** @@ -5624,6 +5658,9 @@ struct wiphy_iftype_akm_suites { * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver * supports enabling HW timestamping for all peers (i.e. no need to * specify a mac address). + * + * @radio: radios belonging to this wiphy + * @n_radio: number of radios */ struct wiphy { struct mutex mtx; @@ -5774,6 +5811,9 @@ struct wiphy { u16 hw_timestamp_max_peers; + int n_radio; + const struct wiphy_radio *radio; + char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6ae3997061b6..f97f5adc8d51 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2052,6 +2052,10 @@ enum nl80211_commands { * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes * defined in &enum nl80211_if_combination_attrs. + * If the wiphy uses multiple radios (@NL80211_ATTR_WIPHY_RADIOS is set), + * this attribute contains the interface combinations of the first radio. + * See @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS for the global wiphy + * combinations for the sum of all radios. * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that * are managed in software: interfaces of these types aren't subject to @@ -2856,6 +2860,14 @@ enum nl80211_commands { * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs * are used on this connection * + * @NL80211_ATTR_WIPHY_RADIOS: Nested attribute describing physical radios + * belonging to this wiphy. See &enum nl80211_wiphy_radio_attrs. + * + * @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS: Nested attribute listing the + * supported interface combinations for all radios combined. In each + * nested item, it contains attributes defined in + * &enum nl80211_if_combination_attrs. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3401,6 +3413,9 @@ enum nl80211_attrs { NL80211_ATTR_ASSOC_SPP_AMSDU, + NL80211_ATTR_WIPHY_RADIOS, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8005,4 +8020,54 @@ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; +/** + * enum nl80211_wiphy_radio_attrs - wiphy radio attributes + * + * @__NL80211_WIPHY_RADIO_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_ATTR_INDEX: Index of this radio (u32) + * @NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE: Frequency range supported by this + * radio. Attribute may be present multiple times. + * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface + * combination for this radio. Attribute may be present multiple times + * and contains attributes defined in &enum nl80211_if_combination_attrs. + * + * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_attrs { + __NL80211_WIPHY_RADIO_ATTR_INVALID, + + NL80211_WIPHY_RADIO_ATTR_INDEX, + NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + + /* keep last */ + __NL80211_WIPHY_RADIO_ATTR_LAST, + NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1, +}; + +/** + * enum nl80211_wiphy_radio_freq_range - wiphy radio frequency range + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_FREQ_ATTR_START: Frequency range start (u32). + * The unit is kHz. + * @NL80211_WIPHY_RADIO_FREQ_ATTR_END: Frequency range end (u32). + * The unit is kHz. + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_FREQ_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_freq_range { + __NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID, + + NL80211_WIPHY_RADIO_FREQ_ATTR_START, + NL80211_WIPHY_RADIO_FREQ_ATTR_END, + + __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST, + NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index deacd5f3f256..7397a372c78e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1632,16 +1632,18 @@ nla_put_failure: } static int nl80211_put_ifcomb_data(struct sk_buff *msg, bool large, int idx, - const struct ieee80211_iface_combination *c) + const struct ieee80211_iface_combination *c, + u16 nested) { struct nlattr *nl_combi, *nl_limits; int i; - nl_combi = nla_nest_start_noflag(msg, idx); + nl_combi = nla_nest_start_noflag(msg, idx | nested); if (!nl_combi) goto nla_put_failure; - nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS); + nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS | + nested); if (!nl_limits) goto nla_put_failure; @@ -1689,19 +1691,26 @@ nla_put_failure: static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct sk_buff *msg, - bool large) + int attr, int radio, + bool large, u16 nested) { + const struct ieee80211_iface_combination *c; struct nlattr *nl_combis; - int i; + int i, n; - nl_combis = nla_nest_start_noflag(msg, - NL80211_ATTR_INTERFACE_COMBINATIONS); + nl_combis = nla_nest_start_noflag(msg, attr | nested); if (!nl_combis) goto nla_put_failure; - for (i = 0; i < wiphy->n_iface_combinations; i++) - if (nl80211_put_ifcomb_data(msg, large, i + 1, - &wiphy->iface_combinations[i])) + if (radio >= 0) { + c = wiphy->radio[0].iface_combinations; + n = wiphy->radio[0].n_iface_combinations; + } else { + c = wiphy->iface_combinations; + n = wiphy->n_iface_combinations; + } + for (i = 0; i < n; i++) + if (nl80211_put_ifcomb_data(msg, large, i + 1, &c[i], nested)) goto nla_put_failure; nla_nest_end(msg, nl_combis); @@ -2408,6 +2417,80 @@ fail: return -ENOBUFS; } +static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) +{ + const struct wiphy_radio *r = &wiphy->radio[idx]; + struct nlattr *radio, *freq; + int i; + + radio = nla_nest_start(msg, idx); + if (!radio) + return -ENOBUFS; + + if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) + goto nla_put_failure; + + for (i = 0; i < r->n_freq_range; i++) { + const struct wiphy_radio_freq_range *range = &r->freq_range[i]; + + freq = nla_nest_start(msg, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE); + if (!freq) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_START, + range->start_freq) || + nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_END, + range->end_freq)) + goto nla_put_failure; + + nla_nest_end(msg, freq); + } + + for (i = 0; i < r->n_iface_combinations; i++) + if (nl80211_put_ifcomb_data(msg, true, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + &r->iface_combinations[i], + NLA_F_NESTED)) + goto nla_put_failure; + + nla_nest_end(msg, radio); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +static int nl80211_put_radios(struct wiphy *wiphy, struct sk_buff *msg) +{ + struct nlattr *radios; + int i; + + if (!wiphy->n_radio) + return 0; + + radios = nla_nest_start(msg, NL80211_ATTR_WIPHY_RADIOS); + if (!radios) + return -ENOBUFS; + + for (i = 0; i < wiphy->n_radio; i++) + if (nl80211_put_radio(wiphy, msg, i)) + goto fail; + + nla_nest_end(msg, radios); + + if (nl80211_put_iface_combinations(wiphy, msg, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + -1, true, NLA_F_NESTED)) + return -ENOBUFS; + + return 0; + +fail: + nla_nest_cancel(msg, radios); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2703,7 +2786,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; if (nl80211_put_iface_combinations(&rdev->wiphy, msg, - state->split)) + NL80211_ATTR_INTERFACE_COMBINATIONS, + rdev->wiphy.n_radio ? 0 : -1, + state->split, 0)) goto nla_put_failure; state->split_start++; @@ -3017,6 +3102,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.hw_timestamp_max_peers)) goto nla_put_failure; + state->split_start++; + break; + case 17: + if (nl80211_put_radios(&rdev->wiphy, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; -- cgit v1.2.3 From abb4cfe3661aa05426916b21164f88ca5a405a3a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:31 +0200 Subject: wifi: cfg80211: extend interface combination check for multi-radio Add a field in struct iface_combination_params to check per-radio interface combinations instead of per-wiphy ones. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/32b28da89c2d759b0324deeefe2be4cee91de18e.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ net/mac80211/util.c | 5 ++++- net/wireless/rdev-ops.h | 12 ++++++++++++ net/wireless/util.c | 33 ++++++++++++++++++++++++++------- 4 files changed, 47 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a00cf80e61dc..4767e2c76b01 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1598,6 +1598,7 @@ struct cfg80211_color_change_settings { * * Used to pass interface combination parameters * + * @radio_idx: wiphy radio index or -1 for global * @num_different_channels: the number of different channels we want * to use for verification * @radar_detect: a bitmap where each bit corresponds to a channel @@ -1611,6 +1612,7 @@ struct cfg80211_color_change_settings { * the verification */ struct iface_combination_params { + int radio_idx; int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; @@ -4580,6 +4582,8 @@ struct mgmt_frame_regs { * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. * @set_ttlm: set the TID to link mapping. + * @get_radio_mask: get bitmask of radios in use. + * (invoked with the wiphy mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4941,6 +4945,7 @@ struct cfg80211_ops { struct cfg80211_set_hw_timestamp *hwts); int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params); + u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev); }; /* diff --git a/net/mac80211/util.c b/net/mac80211/util.c index c6d5f73119d8..27f0db2e9796 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3944,6 +3944,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, int total = 1; struct iface_combination_params params = { .radar_detect = radar_detect, + .radio_idx = -1, }; lockdep_assert_wiphy(local->hw.wiphy); @@ -4034,7 +4035,9 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; u32 max_num_different_channels = 1; int err; - struct iface_combination_params params = {0}; + struct iface_combination_params params = { + .radio_idx = -1, + }; lockdep_assert_wiphy(local->hw.wiphy); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 8f15658002ee..ec3f4aa1c807 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1532,4 +1532,16 @@ rdev_set_ttlm(struct cfg80211_registered_device *rdev, return ret; } + +static inline u32 +rdev_get_radio_mask(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wiphy *wiphy = &rdev->wiphy; + + if (!rdev->ops->get_radio_mask) + return 0; + + return rdev->ops->get_radio_mask(wiphy, dev); +} #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/util.c b/net/wireless/util.c index af6ec719567f..2492f259621f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2307,13 +2307,16 @@ static int cfg80211_wdev_bi(struct wireless_dev *wdev) static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, u32 *beacon_int_gcd, - bool *beacon_int_different) + bool *beacon_int_different, + int radio_idx) { + struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; *beacon_int_gcd = 0; *beacon_int_different = false; + rdev = wiphy_to_rdev(wiphy); list_for_each_entry(wdev, &wiphy->wdev_list, list) { int wdev_bi; @@ -2321,6 +2324,11 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, if (wdev->valid_links) continue; + /* skip wdevs not active on the given wiphy radio */ + if (radio_idx >= 0 && + !(rdev_get_radio_mask(rdev, wdev->netdev) & BIT(radio_idx))) + continue; + wdev_bi = cfg80211_wdev_bi(wdev); if (!wdev_bi) @@ -2368,14 +2376,19 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, void *data), void *data) { + const struct wiphy_radio *radio = NULL; + const struct ieee80211_iface_combination *c, *cs; const struct ieee80211_regdomain *regdom; enum nl80211_dfs_regions region = 0; - int i, j, iftype; + int i, j, n, iftype; int num_interfaces = 0; u32 used_iftypes = 0; u32 beacon_int_gcd; bool beacon_int_different; + if (params->radio_idx >= 0) + radio = &wiphy->radio[params->radio_idx]; + /* * This is a bit strange, since the iteration used to rely only on * the data given by the driver, but here it now relies on context, @@ -2387,7 +2400,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, * interfaces (while being brought up) and channel/radar data. */ cfg80211_calculate_bi_data(wiphy, params->new_beacon_int, - &beacon_int_gcd, &beacon_int_different); + &beacon_int_gcd, &beacon_int_different, + params->radio_idx); if (params->radar_detect) { rcu_read_lock(); @@ -2404,13 +2418,18 @@ int cfg80211_iter_combinations(struct wiphy *wiphy, used_iftypes |= BIT(iftype); } - for (i = 0; i < wiphy->n_iface_combinations; i++) { - const struct ieee80211_iface_combination *c; + if (radio) { + cs = radio->iface_combinations; + n = radio->n_iface_combinations; + } else { + cs = wiphy->iface_combinations; + n = wiphy->n_iface_combinations; + } + for (i = 0; i < n; i++) { struct ieee80211_iface_limit *limits; u32 all_iftypes = 0; - c = &wiphy->iface_combinations[i]; - + c = &cs[i]; if (num_interfaces > c->max_interfaces) continue; if (params->num_different_channels > c->num_different_channels) -- cgit v1.2.3 From 417d88189ccf645a157a3dc3da7e894c1f2829d1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 4 Jul 2024 22:25:59 +0200 Subject: sctp: Fix typos and improve comments Fix typos s/steam/stream/ and spell out Schedule/Unschedule in the comments. Compile-tested only. Signed-off-by: Thorsten Blum Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240704202558.62704-2-thorsten.blum@toblux.com Signed-off-by: Paolo Abeni --- include/net/sctp/stream_sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 572d73fdcd5e..8034bf5febbe 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -35,10 +35,10 @@ struct sctp_sched_ops { struct sctp_chunk *(*dequeue)(struct sctp_outq *q); /* Called only if the chunk fit the packet */ void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); - /* Sched all chunks already enqueued */ - void (*sched_all)(struct sctp_stream *steam); - /* Unched all chunks already enqueued */ - void (*unsched_all)(struct sctp_stream *steam); + /* Schedule all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *stream); + /* Unschedule all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *stream); }; int sctp_sched_set_sched(struct sctp_association *asoc, -- cgit v1.2.3 From 510dba80ed669d6123901ccf0476706122b008b1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:32 +0200 Subject: wifi: cfg80211: add helper for checking if a chandef is valid on a radio Check if the full channel width is in the radio's frequency range. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/7c8ea146feb6f37cee62e5ba6be5370403695797.1720514221.git-series.nbd@nbd.name [add missing Return: documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ net/wireless/util.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4767e2c76b01..192d72c8b465 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6508,6 +6508,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5; } +/** + * cfg80211_radio_chandef_valid - Check if the radio supports the chandef + * + * @radio: wiphy radio + * @chandef: chandef for current channel + * + * Return: whether or not the given chandef is valid for the given radio + */ +bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, + const struct cfg80211_chan_def *chandef); + /** * ieee80211_get_response_rate - get basic rate for a given rate * diff --git a/net/wireless/util.c b/net/wireless/util.c index 2492f259621f..9a7c3adc8a3b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2886,3 +2886,38 @@ cfg80211_get_iftype_ext_capa(struct wiphy *wiphy, enum nl80211_iftype type) return NULL; } EXPORT_SYMBOL(cfg80211_get_iftype_ext_capa); + +static bool +ieee80211_radio_freq_range_valid(const struct wiphy_radio *radio, + u32 freq, u32 width) +{ + const struct wiphy_radio_freq_range *r; + int i; + + for (i = 0; i < radio->n_freq_range; i++) { + r = &radio->freq_range[i]; + if (freq - width / 2 >= r->start_freq && + freq + width / 2 <= r->end_freq) + return true; + } + + return false; +} + +bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, + const struct cfg80211_chan_def *chandef) +{ + u32 freq, width; + + freq = ieee80211_chandef_to_khz(chandef); + width = nl80211_chan_width_to_mhz(chandef->width); + if (!ieee80211_radio_freq_range_valid(radio, freq, width)) + return false; + + freq = MHZ_TO_KHZ(chandef->center_freq2); + if (freq && !ieee80211_radio_freq_range_valid(radio, freq, width)) + return false; + + return true; +} +EXPORT_SYMBOL(cfg80211_radio_chandef_valid); -- cgit v1.2.3 From 2920bc8d916d30b5273ec16e6878f13b24e3851f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:34 +0200 Subject: wifi: mac80211: add radio index to ieee80211_chanctx_conf Will be used to explicitly assign a channel context to a wiphy radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/59f76f57d935f155099276be22badfa671d5bfd9.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/chan.c | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bd0f8aefa797..e78ccbe38d6d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -250,6 +250,7 @@ struct ieee80211_chan_req { * @min_def: the minimum channel definition currently required. * @ap: the channel definition the AP actually is operating as, * for use with (wider bandwidth) OFDMA + * @radio_idx: index of the wiphy radio used used for this channel * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -264,6 +265,7 @@ struct ieee80211_chanctx_conf { struct cfg80211_chan_def min_def; struct cfg80211_chan_def ap; + int radio_idx; u8 rx_chains_static, rx_chains_dynamic; bool radar_enabled; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 06a65dc6f6c6..6c4d02cb07bb 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -656,7 +656,8 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, static struct ieee80211_chanctx * ieee80211_alloc_chanctx(struct ieee80211_local *local, const struct ieee80211_chan_req *chanreq, - enum ieee80211_chanctx_mode mode) + enum ieee80211_chanctx_mode mode, + int radio_idx) { struct ieee80211_chanctx *ctx; @@ -674,6 +675,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, ctx->conf.rx_chains_dynamic = 1; ctx->mode = mode; ctx->conf.radar_enabled = false; + ctx->conf.radio_idx = radio_idx; _ieee80211_recalc_chanctx_min_def(local, ctx, NULL, false); return ctx; @@ -714,7 +716,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); - ctx = ieee80211_alloc_chanctx(local, chanreq, mode); + ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); if (!ctx) return ERR_PTR(-ENOMEM); @@ -1155,7 +1157,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, !list_empty(&curr_ctx->reserved_links)) return -EBUSY; - new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode); + new_ctx = ieee80211_alloc_chanctx(local, chanreq, mode, -1); if (!new_ctx) return -ENOMEM; -- cgit v1.2.3 From 2cb13dec8c5e5e104fd2f71c2dee761d6ed9a333 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:34 +0200 Subject: cache: add __cacheline_group_{begin, end}_aligned() (+ couple more) __cacheline_group_begin(), unfortunately, doesn't align the group anyhow. If it is wanted, then you need to do something like __cacheline_group_begin(grp) __aligned(ALIGN) which isn't really convenient nor compact. Add the _aligned() counterparts to align the groups automatically to either the specified alignment (optional) or ``SMP_CACHE_BYTES``. Note that the actual struct layout will then be (on x64 with 64-byte CL): struct x { u32 y; // offset 0, size 4, padding 56 __cacheline_group_begin__grp; // offset 64, size 0 u32 z; // offset 64, size 4, padding 4 __cacheline_group_end__grp; // offset 72, size 0 __cacheline_group_pad__grp; // offset 72, size 0, padding 56 u32 w; // offset 128 }; The end marker is aligned to long, so that you can assert the struct size more strictly, but the offset of the next field in the structure will be aligned to the group alignment, so that the next field won't fall into the group it's not intended to. Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro. __LARGEST_ALIGN is the value to which the compilers align fields when __aligned_largest is specified. Sometimes, it might be needed to get this value outside of variable definitions. LARGEST_ALIGN() is macro which just aligns a value to __LARGEST_ALIGN. Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using ``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former also accounts L2, needed in some cases. Signed-off-by: Alexander Lobakin Reviewed-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/cache.h b/include/linux/cache.h index 0ecb17bb6883..ca2a05682a54 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,6 +13,32 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +/** + * SMP_CACHE_ALIGN - align a value to the L2 cacheline size + * @x: value to align + * + * On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes, + * this needs to be accounted. + * + * Return: aligned value. + */ +#ifndef SMP_CACHE_ALIGN +#define SMP_CACHE_ALIGN(x) ALIGN(x, SMP_CACHE_BYTES) +#endif + +/* + * ``__aligned_largest`` aligns a field to the value most optimal for the + * target architecture to perform memory operations. Get the actual value + * to be able to use it anywhere else. + */ +#ifndef __LARGEST_ALIGN +#define __LARGEST_ALIGN sizeof(struct { long x; } __aligned_largest) +#endif + +#ifndef LARGEST_ALIGN +#define LARGEST_ALIGN(x) ALIGN(x, __LARGEST_ALIGN) +#endif + /* * __read_mostly is used to keep rarely changing variables out of frequently * updated cachelines. Its use should be reserved for data that is used @@ -95,6 +121,39 @@ __u8 __cacheline_group_end__##GROUP[0] #endif +/** + * __cacheline_group_begin_aligned - declare an aligned group start + * @GROUP: name of the group + * @...: optional group alignment + * + * The following block inside a struct: + * + * __cacheline_group_begin_aligned(grp); + * field a; + * field b; + * __cacheline_group_end_aligned(grp); + * + * will always be aligned to either the specified alignment or + * ``SMP_CACHE_BYTES``. + */ +#define __cacheline_group_begin_aligned(GROUP, ...) \ + __cacheline_group_begin(GROUP) \ + __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES) + +/** + * __cacheline_group_end_aligned - declare an aligned group end + * @GROUP: name of the group + * @...: optional alignment (same as was in __cacheline_group_begin_aligned()) + * + * Note that the end marker is aligned to sizeof(long) to allow more precise + * size assertion. It also declares a padding at the end to avoid next field + * falling into this cacheline. + */ +#define __cacheline_group_end_aligned(GROUP, ...) \ + __cacheline_group_end(GROUP) __aligned(sizeof(long)); \ + struct { } __cacheline_group_pad__##GROUP \ + __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES) + #ifndef CACHELINE_ASSERT_GROUP_MEMBER #define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \ BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \ -- cgit v1.2.3 From 39daa09d34ada1bc7227d68def63e0a2105b5496 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:35 +0200 Subject: page_pool: use __cacheline_group_{begin, end}_aligned() Instead of doing __cacheline_group_begin() __aligned(), use the new __cacheline_group_{begin,end}_aligned(), so that it will take care of the group alignment itself. Also replace open-coded `4 * sizeof(long)` in two places with a definition. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/page_pool/types.h | 22 ++++++++++++---------- net/core/page_pool.c | 3 ++- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index b70bcc14ceda..50569fed7868 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -129,6 +129,16 @@ struct page_pool_stats { }; #endif +/* The whole frag API block must stay within one cacheline. On 32-bit systems, + * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``. + * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``. + * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that + * one for simplicity. + * Having it aligned to a cacheline boundary may be excessive and doesn't bring + * any good. + */ +#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) + struct page_pool { struct page_pool_params_fast p; @@ -142,19 +152,11 @@ struct page_pool { bool system:1; /* This is a global percpu pool */ #endif - /* The following block must stay within one cacheline. On 32-bit - * systems, sizeof(long) == sizeof(int), so that the block size is - * ``3 * sizeof(long)``. On 64-bit systems, the actual size is - * ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of - * them is ``4 * sizeof(long)``, so just use that one for simplicity. - * Having it aligned to a cacheline boundary may be excessive and - * doesn't bring any good. - */ - __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); + __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); long frag_users; netmem_ref frag_page; unsigned int frag_offset; - __cacheline_group_end(frag); + __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); struct delayed_work release_dw; void (*disconnect)(void *pool); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 855271a6cad2..2abe6e919224 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -178,7 +178,8 @@ static void page_pool_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); - CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long)); + CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, + PAGE_POOL_FRAG_GROUP_ALIGN); } static int page_pool_init(struct page_pool *pool, -- cgit v1.2.3 From 62c884256ea1f898b00f20729a306a4eeb1840b1 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:36 +0200 Subject: libeth: add cacheline / struct layout assertion helpers Add helpers to assert struct field layout, a bit more crazy and networking-specific than in . They assume you have 3 CL-aligned groups (read-mostly, read-write, cold) in a struct you want to assert, and nothing besides them. For 64-bit with 64-byte cachelines, the assertions are as strict as possible, as the size can then be easily predicted. For the rest, make sure they don't cross the specified bound. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/cache.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 include/net/libeth/cache.h (limited to 'include') diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h new file mode 100644 index 000000000000..bdb0c043ce61 --- /dev/null +++ b/include/net/libeth/cache.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_CACHE_H +#define __LIBETH_CACHE_H + +#include + +/** + * libeth_cacheline_group_assert - make sure cacheline group size is expected + * @type: type of the structure containing the group + * @grp: group name inside the struct + * @sz: expected group size + */ +#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64 +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) == \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz) \ + static_assert(sizeof(type) == (sz)) +#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */ +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) <= \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz) \ + static_assert(sizeof(type) <= (sz)) +#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */ + +#define __libeth_cls1(sz1) SMP_CACHE_ALIGN(sz1) +#define __libeth_cls2(sz1, sz2) (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2)) +#define __libeth_cls3(sz1, sz2, sz3) \ + (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3)) +#define __libeth_cls(...) \ + CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +/** + * libeth_cacheline_struct_assert - make sure CL-based struct size is expected + * @type: type of the struct + * @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold) + * + * When a struct contains several CL groups, it's difficult to predict its size + * on different architectures. The macro instead takes sizes of all of the + * groups the structure contains and generates the final struct size. + */ +#define libeth_cacheline_struct_assert(type, ...) \ + __libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)); \ + static_assert(__alignof(type) >= SMP_CACHE_BYTES) + +/** + * libeth_cacheline_set_assert - make sure CL-based struct layout is expected + * @type: type of the struct + * @ro: expected size of the read-mostly group + * @rw: expected size of the read-write group + * @c: expected size of the cold group + * + * Check that each group size is expected and then do final struct size check. + */ +#define libeth_cacheline_set_assert(type, ro, rw, c) \ + libeth_cacheline_group_assert(type, read_mostly, ro); \ + libeth_cacheline_group_assert(type, read_write, rw); \ + libeth_cacheline_group_assert(type, cold, c); \ + libeth_cacheline_struct_assert(type, ro, rw, c) + +#endif /* __LIBETH_CACHE_H */ -- cgit v1.2.3 From 5aaac1aece4e4db9d620791a33f7a4173c660e65 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:45 +0200 Subject: libeth: support different types of buffers for Rx Unlike previous generations, idpf requires more buffer types for optimal performance. This includes: header buffers, short buffers, and no-overhead buffers (w/o headroom and tailroom, for TCP zerocopy when the header split is enabled). Introduce libeth Rx buffer type and calculate page_pool params accordingly. All the HW-related details like buffer alignment are still accounted. For the header buffers, pick 256 bytes as in most places in the kernel (have you ever seen frames with bigger headers?). Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/libeth/rx.c | 132 ++++++++++++++++++++++++++++++--- include/net/libeth/rx.h | 19 +++++ 2 files changed, 140 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c index 6221b88c34ac..d0b158b6e55b 100644 --- a/drivers/net/ethernet/intel/libeth/rx.c +++ b/drivers/net/ethernet/intel/libeth/rx.c @@ -6,7 +6,7 @@ /* Rx buffer management */ /** - * libeth_rx_hw_len - get the actual buffer size to be passed to HW + * libeth_rx_hw_len_mtu - get the actual buffer size to be passed to HW * @pp: &page_pool_params of the netdev to calculate the size for * @max_len: maximum buffer size for a single descriptor * @@ -14,7 +14,7 @@ * MTU the @dev has, HW required alignment, minimum and maximum allowed values, * and system's page size. */ -static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len) +static u32 libeth_rx_hw_len_mtu(const struct page_pool_params *pp, u32 max_len) { u32 len; @@ -26,6 +26,118 @@ static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len) return len; } +/** + * libeth_rx_hw_len_truesize - get the short buffer size to be passed to HW + * @pp: &page_pool_params of the netdev to calculate the size for + * @max_len: maximum buffer size for a single descriptor + * @truesize: desired truesize for the buffers + * + * Return: HW-writeable length per one buffer to pass it to the HW ignoring the + * MTU and closest to the passed truesize. Can be used for "short" buffer + * queues to fragment pages more efficiently. + */ +static u32 libeth_rx_hw_len_truesize(const struct page_pool_params *pp, + u32 max_len, u32 truesize) +{ + u32 min, len; + + min = SKB_HEAD_ALIGN(pp->offset + LIBETH_RX_BUF_STRIDE); + truesize = clamp(roundup_pow_of_two(truesize), roundup_pow_of_two(min), + PAGE_SIZE << LIBETH_RX_PAGE_ORDER); + + len = SKB_WITH_OVERHEAD(truesize - pp->offset); + len = ALIGN_DOWN(len, LIBETH_RX_BUF_STRIDE) ? : LIBETH_RX_BUF_STRIDE; + len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE), + pp->max_len); + + return len; +} + +/** + * libeth_rx_page_pool_params - calculate params with the stack overhead + * @fq: buffer queue to calculate the size for + * @pp: &page_pool_params of the netdev + * + * Set the PP params to will all needed stack overhead (headroom, tailroom) and + * both the HW buffer length and the truesize for all types of buffers. For + * "short" buffers, truesize never exceeds the "wanted" one; for the rest, + * it can be up to the page size. + * + * Return: true on success, false on invalid input params. + */ +static bool libeth_rx_page_pool_params(struct libeth_fq *fq, + struct page_pool_params *pp) +{ + pp->offset = LIBETH_SKB_HEADROOM; + /* HW-writeable / syncable length per one page */ + pp->max_len = LIBETH_RX_PAGE_LEN(pp->offset); + + /* HW-writeable length per buffer */ + switch (fq->type) { + case LIBETH_FQE_MTU: + fq->buf_len = libeth_rx_hw_len_mtu(pp, fq->buf_len); + break; + case LIBETH_FQE_SHORT: + fq->buf_len = libeth_rx_hw_len_truesize(pp, fq->buf_len, + fq->truesize); + break; + case LIBETH_FQE_HDR: + fq->buf_len = ALIGN(LIBETH_MAX_HEAD, LIBETH_RX_BUF_STRIDE); + break; + default: + return false; + } + + /* Buffer size to allocate */ + fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp->offset + + fq->buf_len)); + + return true; +} + +/** + * libeth_rx_page_pool_params_zc - calculate params without the stack overhead + * @fq: buffer queue to calculate the size for + * @pp: &page_pool_params of the netdev + * + * Set the PP params to exclude the stack overhead and both the buffer length + * and the truesize, which are equal for the data buffers. Note that this + * requires separate header buffers to be always active and account the + * overhead. + * With the MTU == ``PAGE_SIZE``, this allows the kernel to enable the zerocopy + * mode. + * + * Return: true on success, false on invalid input params. + */ +static bool libeth_rx_page_pool_params_zc(struct libeth_fq *fq, + struct page_pool_params *pp) +{ + u32 mtu, max; + + pp->offset = 0; + pp->max_len = PAGE_SIZE << LIBETH_RX_PAGE_ORDER; + + switch (fq->type) { + case LIBETH_FQE_MTU: + mtu = READ_ONCE(pp->netdev->mtu); + break; + case LIBETH_FQE_SHORT: + mtu = fq->truesize; + break; + default: + return false; + } + + mtu = roundup_pow_of_two(mtu); + max = min(rounddown_pow_of_two(fq->buf_len ? : U32_MAX), + pp->max_len); + + fq->buf_len = clamp(mtu, LIBETH_RX_BUF_STRIDE, max); + fq->truesize = fq->buf_len; + + return true; +} + /** * libeth_rx_fq_create - create a PP with the default libeth settings * @fq: buffer queue struct to fill @@ -44,19 +156,17 @@ int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi) .netdev = napi->dev, .napi = napi, .dma_dir = DMA_FROM_DEVICE, - .offset = LIBETH_SKB_HEADROOM, }; struct libeth_fqe *fqes; struct page_pool *pool; + bool ret; - /* HW-writeable / syncable length per one page */ - pp.max_len = LIBETH_RX_PAGE_LEN(pp.offset); - - /* HW-writeable length per buffer */ - fq->buf_len = libeth_rx_hw_len(&pp, fq->buf_len); - /* Buffer size to allocate */ - fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset + - fq->buf_len)); + if (!fq->hsplit) + ret = libeth_rx_page_pool_params(fq, &pp); + else + ret = libeth_rx_page_pool_params_zc(fq, &pp); + if (!ret) + return -EINVAL; pool = page_pool_create(&pp); if (IS_ERR(pool)) diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index f29ea3e34c6c..43574bd6612f 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -17,6 +17,8 @@ #define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) +/* Maximum supported L2-L4 header length */ +#define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256)) /* Always use order-0 pages */ #define LIBETH_RX_PAGE_ORDER 0 @@ -43,6 +45,18 @@ struct libeth_fqe { u32 truesize; } __aligned_largest; +/** + * enum libeth_fqe_type - enum representing types of Rx buffers + * @LIBETH_FQE_MTU: buffer size is determined by MTU + * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames + * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers + */ +enum libeth_fqe_type { + LIBETH_FQE_MTU = 0U, + LIBETH_FQE_SHORT, + LIBETH_FQE_HDR, +}; + /** * struct libeth_fq - structure representing a buffer (fill) queue * @fp: hotpath part of the structure @@ -50,6 +64,8 @@ struct libeth_fqe { * @fqes: array of Rx buffers * @truesize: size to allocate per buffer, w/overhead * @count: number of descriptors/buffers the queue has + * @type: type of the buffers this queue has + * @hsplit: flag whether header split is enabled * @buf_len: HW-writeable length per each buffer * @nid: ID of the closest NUMA node with memory */ @@ -63,6 +79,9 @@ struct libeth_fq { ); /* Cold fields */ + enum libeth_fqe_type type:2; + bool hsplit:1; + u32 buf_len; int nid; }; -- cgit v1.2.3 From a808878308a8041ae10a151d69e2d22f94cae9f4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 14 Apr 2024 11:05:25 +0300 Subject: driver core: auxiliary bus: show auxiliary device IRQs PCI subfunctions (SF) are anchored on the auxiliary bus. PCI physical and virtual functions are anchored on the PCI bus. The irq information of each such function is visible to users via sysfs directory "msi_irqs" containing files for each irq entry. However, for PCI SFs such information is unavailable. Due to this users have no visibility on IRQs used by the SFs. Secondly, an SF can be multi function device supporting rdma, netdevice and more. Without irq information at the bus level, the user is unable to view or use the affinity of the SF IRQs. Hence to match to the equivalent PCI PFs and VFs, add "irqs" directory, for supporting auxiliary devices, containing file for each irq entry. For example: $ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/ 50 51 52 53 54 55 56 57 58 Cc: Simon Horman Reviewed-by: Przemek Kitszel Reviewed-by: Parav Pandit Reviewed-by: Greg Kroah-Hartman Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- v9-v10: - remove Przemek RB - add name field to auxiliary_irq_info (Greg and Przemek) - handle bogus IRQ in auxiliary_device_sysfs_irq_remove (Greg) v8-v9: - add Przemek RB - use guard() in auxiliary_irq_dir_prepare (Paolo) v7-v8: - use cleanup.h for info and name fields (Greg) - correct error flow in auxiliary_irq_dir_prepare (Przemek) - add documentation for new fields of auxiliary_device (Simon) v6-v7: - dynamically creating irqs directory when first irq file created (Greg) - removed irqs flag and simplified the dev_add() API (Greg) - move sysfs related new code to a new auxiliary_sysfs.c file (Greg) v5-v6: - removed concept of shared and exclusive and hence global xarray (Greg) v4-v5: - restore global mutex and replace refcount_t with simple integer (Greg) v3->4: - remove global mutex (Przemek) v2->v3: - fix function declaration in case SYSFS isn't defined v1->v2: - move #ifdefs from drivers/base/auxiliary.c to include/linux/auxiliary_bus.h (Greg) - use EXPORT_SYMBOL_GPL instead of EXPORT_SYMBOL (Greg) - Fix kzalloc(ref) to kzalloc(*ref) (Simon) - Add return description in auxiliary_device_sysfs_irq_add() kdoc (Simon) - Fix auxiliary_irq_mode_show doc (kernel test boot) --- Documentation/ABI/testing/sysfs-bus-auxiliary | 9 ++ drivers/base/Makefile | 1 + drivers/base/auxiliary.c | 1 + drivers/base/auxiliary_sysfs.c | 113 ++++++++++++++++++++++++++ include/linux/auxiliary_bus.h | 24 ++++++ 5 files changed, 148 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-auxiliary create mode 100644 drivers/base/auxiliary_sysfs.c (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-bus-auxiliary b/Documentation/ABI/testing/sysfs-bus-auxiliary new file mode 100644 index 000000000000..cc856079690f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-auxiliary @@ -0,0 +1,9 @@ +What: /sys/bus/auxiliary/devices/.../irqs/ +Date: April, 2024 +Contact: Shay Drory +Description: + The /sys/devices/.../irqs directory contains a variable set of + files, with each file is named as irq number similar to PCI PF + or VF's irq number located in msi_irqs directory. + These irq files are added and removed dynamically when an IRQ + is requested and freed respectively for the PCI SF. diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 3079bfe53d04..7fb21768ca36 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o ifeq ($(CONFIG_SYSFS),y) obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_AUXILIARY_BUS) += auxiliary_sysfs.o endif obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o obj-$(CONFIG_REGMAP) += regmap/ diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index d3a2c40c2f12..3f01f4ec69e5 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -287,6 +287,7 @@ int auxiliary_device_init(struct auxiliary_device *auxdev) dev->bus = &auxiliary_bus_type; device_initialize(&auxdev->dev); + mutex_init(&auxdev->sysfs.lock); return 0; } EXPORT_SYMBOL_GPL(auxiliary_device_init); diff --git a/drivers/base/auxiliary_sysfs.c b/drivers/base/auxiliary_sysfs.c new file mode 100644 index 000000000000..754f21730afd --- /dev/null +++ b/drivers/base/auxiliary_sysfs.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ + +#include +#include + +#define AUXILIARY_MAX_IRQ_NAME 11 + +struct auxiliary_irq_info { + struct device_attribute sysfs_attr; + char name[AUXILIARY_MAX_IRQ_NAME]; +}; + +static struct attribute *auxiliary_irq_attrs[] = { + NULL +}; + +static const struct attribute_group auxiliary_irqs_group = { + .name = "irqs", + .attrs = auxiliary_irq_attrs, +}; + +static int auxiliary_irq_dir_prepare(struct auxiliary_device *auxdev) +{ + int ret = 0; + + guard(mutex)(&auxdev->sysfs.lock); + if (auxdev->sysfs.irq_dir_exists) + return 0; + + ret = devm_device_add_group(&auxdev->dev, &auxiliary_irqs_group); + if (ret) + return ret; + + auxdev->sysfs.irq_dir_exists = true; + xa_init(&auxdev->sysfs.irqs); + return 0; +} + +/** + * auxiliary_device_sysfs_irq_add - add a sysfs entry for the given IRQ + * @auxdev: auxiliary bus device to add the sysfs entry. + * @irq: The associated interrupt number. + * + * This function should be called after auxiliary device have successfully + * received the irq. + * The driver is responsible to add a unique irq for the auxiliary device. The + * driver can invoke this function from multiple thread context safely for + * unique irqs of the auxiliary devices. The driver must not invoke this API + * multiple times if the irq is already added previously. + * + * Return: zero on success or an error code on failure. + */ +int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq) +{ + struct auxiliary_irq_info *info __free(kfree) = NULL; + struct device *dev = &auxdev->dev; + int ret; + + ret = auxiliary_irq_dir_prepare(auxdev); + if (ret) + return ret; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + sysfs_attr_init(&info->sysfs_attr.attr); + snprintf(info->name, AUXILIARY_MAX_IRQ_NAME, "%d", irq); + + ret = xa_insert(&auxdev->sysfs.irqs, irq, info, GFP_KERNEL); + if (ret) + return ret; + + info->sysfs_attr.attr.name = info->name; + ret = sysfs_add_file_to_group(&dev->kobj, &info->sysfs_attr.attr, + auxiliary_irqs_group.name); + if (ret) + goto sysfs_add_err; + + xa_store(&auxdev->sysfs.irqs, irq, no_free_ptr(info), GFP_KERNEL); + return 0; + +sysfs_add_err: + xa_erase(&auxdev->sysfs.irqs, irq); + return ret; +} +EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_add); + +/** + * auxiliary_device_sysfs_irq_remove - remove a sysfs entry for the given IRQ + * @auxdev: auxiliary bus device to add the sysfs entry. + * @irq: the IRQ to remove. + * + * This function should be called to remove an IRQ sysfs entry. + * The driver must invoke this API when IRQ is released by the device. + */ +void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) +{ + struct auxiliary_irq_info *info __free(kfree) = xa_load(&auxdev->sysfs.irqs, irq); + struct device *dev = &auxdev->dev; + + if (!info) { + dev_err(&auxdev->dev, "IRQ %d doesn't exist\n", irq); + return; + } + sysfs_remove_file_from_group(&dev->kobj, &info->sysfs_attr.attr, + auxiliary_irqs_group.name); + xa_erase(&auxdev->sysfs.irqs, irq); +} +EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_remove); diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index de21d9d24a95..3ba4487c9cd9 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -58,6 +58,9 @@ * in * @name: Match name found by the auxiliary device driver, * @id: unique identitier if multiple devices of the same name are exported, + * @irqs: irqs xarray contains irq indices which are used by the device, + * @lock: Synchronize irq sysfs creation, + * @irq_dir_exists: whether "irqs" directory exists, * * An auxiliary_device represents a part of its parent device's functionality. * It is given a name that, combined with the registering drivers @@ -139,6 +142,11 @@ struct auxiliary_device { struct device dev; const char *name; u32 id; + struct { + struct xarray irqs; + struct mutex lock; /* Synchronize irq sysfs creation */ + bool irq_dir_exists; + } sysfs; }; /** @@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev); int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname); #define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME) +#ifdef CONFIG_SYSFS +int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq); +void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, + int irq); +#else /* CONFIG_SYSFS */ +static inline int +auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq) +{ + return 0; +} + +static inline void +auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {} +#endif + static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev) { + mutex_destroy(&auxdev->sysfs.lock); put_device(&auxdev->dev); } -- cgit v1.2.3 From 8341eee81c794db0d8dd503c2b0ea2f55eba7334 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Wed, 10 Jul 2024 19:10:04 +0200 Subject: net: psample: fix flag being set in wrong skb A typo makes PSAMPLE_ATTR_SAMPLE_RATE netlink flag be added to the wrong sk_buff. Fix the error and make the input sk_buff pointer "const" so that it doesn't happen again. Acked-by: Eelco Chaudron Fixes: 7b1b2b60c63f ("net: psample: allow using rate as probability") Signed-off-by: Adrian Moreno Reviewed-by: Ido Schimmel Reviewed-by: Antoine Tenart Link: https://patch.msgid.link/20240710171004.2164034-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 8 +++++--- net/psample/psample.c | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index c52e9ebd88dd..5071b5fc2b59 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -38,13 +38,15 @@ struct sk_buff; #if IS_ENABLED(CONFIG_PSAMPLE) -void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 sample_rate, const struct psample_metadata *md); +void psample_sample_packet(struct psample_group *group, + const struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md); #else static inline void psample_sample_packet(struct psample_group *group, - struct sk_buff *skb, u32 sample_rate, + const struct sk_buff *skb, + u32 sample_rate, const struct psample_metadata *md) { } diff --git a/net/psample/psample.c b/net/psample/psample.c index f48b5b9cd409..a0ddae8a65f9 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -360,8 +360,9 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) } #endif -void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 sample_rate, const struct psample_metadata *md) +void psample_sample_packet(struct psample_group *group, + const struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md) { ktime_t tstamp = ktime_get_real(); int out_ifindex = md->out_ifindex; @@ -498,7 +499,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, goto error; if (md->rate_as_probability) - nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); + nla_put_flag(nl_skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY); genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, -- cgit v1.2.3 From 13cabc47f8ae69d24653f32c28399d493fde0a56 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 10 Jul 2024 04:30:28 -0700 Subject: netdevice: define and allocate &net_device _properly_ In fact, this structure contains a flexible array at the end, but historically its size, alignment etc., is calculated manually. There are several instances of the structure embedded into other structures, but also there's ongoing effort to remove them and we could in the meantime declare &net_device properly. Declare the array explicitly, use struct_size() and store the array size inside the structure, so that __counted_by() can be applied. Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the allocated buffer is aligned to what the user expects. Also, change its alignment from %NETDEV_ALIGN to the cacheline size as per several suggestions on the netdev ML. bloat-o-meter for vmlinux: free_netdev 445 440 -5 netdev_freemem 24 - -24 alloc_netdev_mqs 1481 1450 -31 On x86_64 with several NICs of different vendors, I was never able to get a &net_device pointer not aligned to the cacheline size after the change. Signed-off-by: Alexander Lobakin Signed-off-by: Breno Leitao Reviewed-by: Przemek Kitszel Reviewed-by: Eric Dumazet Reviewed-by: Kees Cook Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 15 +++++++++------ net/core/dev.c | 30 ++++++------------------------ net/core/net-sysfs.c | 2 +- 3 files changed, 16 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93558645c6d0..607009150b5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1819,7 +1819,8 @@ enum netdev_reg_state { * @priv_flags: Like 'flags' but invisible to userspace, * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) - * @padded: How much padding added by alloc_netdev() + * @priv_len: Size of the ->priv flexible array + * @priv: Flexible array containing private data * @operstate: RFC2863 operstate * @link_mode: Mapping policy to operstate * @if_port: Selectable AUI, TP, ... @@ -2199,10 +2200,10 @@ struct net_device { unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; - unsigned short padded; + int irq; + u32 priv_len; spinlock_t addr_list_lock; - int irq; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; @@ -2406,7 +2407,10 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; -}; + + u8 priv[] ____cacheline_aligned + __counted_by(priv_len); +} ____cacheline_aligned; #define to_net_dev(d) container_of(d, struct net_device, dev) /* @@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net) */ static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); + return (void *)dev->priv; } /* Set the sysfs physical device reference for the network logical device @@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); -void netdev_freemem(struct net_device *dev); void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index 73e5af6943c3..6ea1d20676fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11006,13 +11006,6 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev) } EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); -void netdev_freemem(struct net_device *dev) -{ - char *addr = (char *)dev - dev->padded; - - kvfree(addr); -} - /** * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for @@ -11032,8 +11025,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - unsigned int alloc_size; - struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -11047,21 +11038,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, return NULL; } - alloc_size = sizeof(struct net_device); - if (sizeof_priv) { - /* ensure 32-byte alignment of private area */ - alloc_size = ALIGN(alloc_size, NETDEV_ALIGN); - alloc_size += sizeof_priv; - } - /* ensure 32-byte alignment of whole construct */ - alloc_size += NETDEV_ALIGN - 1; - - p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); - if (!p) + dev = kvzalloc(struct_size(dev, priv, sizeof_priv), + GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); + if (!dev) return NULL; - dev = PTR_ALIGN(p, NETDEV_ALIGN); - dev->padded = (char *)dev - (char *)p; + dev->priv_len = sizeof_priv; ref_tracker_dir_init(&dev->refcnt_tracker, 128, name); #ifdef CONFIG_PCPU_DEV_REFCNT @@ -11148,7 +11130,7 @@ free_pcpu: free_percpu(dev->pcpu_refcnt); free_dev: #endif - netdev_freemem(dev); + kvfree(dev); return NULL; } EXPORT_SYMBOL(alloc_netdev_mqs); @@ -11203,7 +11185,7 @@ void free_netdev(struct net_device *dev) /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED || dev->reg_state == NETREG_DUMMY) { - netdev_freemem(dev); + kvfree(dev); return; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4c27a360c294..0e2084ce7b75 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -2028,7 +2028,7 @@ static void netdev_release(struct device *d) * device is dead and about to be freed. */ kfree(rcu_access_pointer(dev->ifalias)); - netdev_freemem(dev); + kvfree(dev); } static const void *net_namespace(const struct device *d) -- cgit v1.2.3 From f7866c35873377313ff94398f17d425b28b71de1 Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Thu, 11 Jul 2024 22:58:18 +0800 Subject: bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT When loading a EXT program without specifying `attr->attach_prog_fd`, the `prog->aux->dst_prog` will be null. At this time, calling resolve_prog_type() anywhere will result in a null pointer dereference. Example stack trace: [ 8.107863] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 [ 8.108262] Mem abort info: [ 8.108384] ESR = 0x0000000096000004 [ 8.108547] EC = 0x25: DABT (current EL), IL = 32 bits [ 8.108722] SET = 0, FnV = 0 [ 8.108827] EA = 0, S1PTW = 0 [ 8.108939] FSC = 0x04: level 0 translation fault [ 8.109102] Data abort info: [ 8.109203] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 8.109399] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 8.109614] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 8.109836] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101354000 [ 8.110011] [0000000000000004] pgd=0000000000000000, p4d=0000000000000000 [ 8.112624] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 8.112783] Modules linked in: [ 8.113120] CPU: 0 PID: 99 Comm: may_access_dire Not tainted 6.10.0-rc3-next-20240613-dirty #1 [ 8.113230] Hardware name: linux,dummy-virt (DT) [ 8.113390] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 8.113429] pc : may_access_direct_pkt_data+0x24/0xa0 [ 8.113746] lr : add_subprog_and_kfunc+0x634/0x8e8 [ 8.113798] sp : ffff80008283b9f0 [ 8.113813] x29: ffff80008283b9f0 x28: ffff800082795048 x27: 0000000000000001 [ 8.113881] x26: ffff0000c0bb2600 x25: 0000000000000000 x24: 0000000000000000 [ 8.113897] x23: ffff0000c1134000 x22: 000000000001864f x21: ffff0000c1138000 [ 8.113912] x20: 0000000000000001 x19: ffff0000c12b8000 x18: ffffffffffffffff [ 8.113929] x17: 0000000000000000 x16: 0000000000000000 x15: 0720072007200720 [ 8.113944] x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 [ 8.113958] x11: 0720072007200720 x10: 0000000000f9fca4 x9 : ffff80008021f4e4 [ 8.113991] x8 : 0101010101010101 x7 : 746f72705f6d656d x6 : 000000001e0e0f5f [ 8.114006] x5 : 000000000001864f x4 : ffff0000c12b8000 x3 : 000000000000001c [ 8.114020] x2 : 0000000000000002 x1 : 0000000000000000 x0 : 0000000000000000 [ 8.114126] Call trace: [ 8.114159] may_access_direct_pkt_data+0x24/0xa0 [ 8.114202] bpf_check+0x3bc/0x28c0 [ 8.114214] bpf_prog_load+0x658/0xa58 [ 8.114227] __sys_bpf+0xc50/0x2250 [ 8.114240] __arm64_sys_bpf+0x28/0x40 [ 8.114254] invoke_syscall.constprop.0+0x54/0xf0 [ 8.114273] do_el0_svc+0x4c/0xd8 [ 8.114289] el0_svc+0x3c/0x140 [ 8.114305] el0t_64_sync_handler+0x134/0x150 [ 8.114331] el0t_64_sync+0x168/0x170 [ 8.114477] Code: 7100707f 54000081 f9401c00 f9403800 (b9400403) [ 8.118672] ---[ end trace 0000000000000000 ]--- One way to fix it is by forcing `attach_prog_fd` non-empty when bpf_prog_load(). But this will lead to `libbpf_probe_bpf_prog_type` API broken which use verifier log to probe prog type and will log nothing if we reject invalid EXT prog before bpf_check(). Another way is by adding null check in resolve_prog_type(). The issue was introduced by commit 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") which wanted to correct type resolution for BPF_PROG_TYPE_TRACING programs. Before that, the type resolution of BPF_PROG_TYPE_EXT prog actually follows the logic below: prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; It implies that when EXT program is not yet attached to `dst_prog`, the prog type should be EXT itself. This code worked fine in the past. So just keep using it. Fix this by returning `prog->type` for BPF_PROG_TYPE_EXT if `dst_prog` is not present in resolve_prog_type(). Fixes: 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") Signed-off-by: Tengda Wu Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20240711145819.254178-2-wutengda@huaweicloud.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e98ba5a5cf79..6503c85b10a3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,7 +856,7 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return prog->type == BPF_PROG_TYPE_EXT ? + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? prog->aux->dst_prog->type : prog->type; } -- cgit v1.2.3 From d69ba6bbaf1f606ac354e925571a54d025e32aae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:03 -0700 Subject: net: ethtool: let drivers remove lost RSS contexts RSS contexts may get lost from a device, in various extreme circumstances. Specifically if the firmware leaks resources and resets, or crashes and either recovers in partially working state or the crash causes a different FW version to run - creating the context again may fail. Drivers should do their absolute best to prevent this from happening. When it does, however, telling user that a context exists, when it can't possibly be used any more is counter productive. Add a helper for drivers to discard contexts. Print an error, in the future netlink notification will also be sent. More robust approaches were proposed, like keeping the contexts but marking them as "dead" (but possibly resurrected by next reset). That may be better but it's unclear at this stage whether the effort is worth the benefits. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ net/ethtool/common.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..89da0254ccd4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -210,6 +210,8 @@ static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, return struct_size_t(struct ethtool_rxfh_context, data, flex_len); } +void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7bda9600efcf..67d06cd002a5 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -741,3 +741,17 @@ ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); + +void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) +{ + struct ethtool_rxfh_context *ctx; + + WARN_ONCE(!rtnl_is_locked() && + !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), + "RSS context lock assertion failed\n"); + + netdev_err(dev, "device error, RSS context %d lost\n", context_id); + ctx = xa_erase(&dev->ethtool->rss_ctx, context_id); + kfree(ctx); +} +EXPORT_SYMBOL(ethtool_rxfh_context_lost); -- cgit v1.2.3 From 28c8757a792bbbc76407777bd0303862daa75057 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:04 -0700 Subject: net: ethtool: let drivers declare max size of RSS indir table and key Some drivers (bnxt but I think also mlx5 from ML discussions) change the size of the indirection table depending on the number of Rx rings. Decouple the max table size from the size of the currently used table, so that we can reserve space in the context for table growth. Static members in ethtool_ops are good enough for now, we can add callbacks to read the max size more dynamically if someone needs that. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 20 ++++++++------------ net/ethtool/ioctl.c | 46 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 89da0254ccd4..a1ee76936f53 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -181,6 +181,7 @@ struct ethtool_rxfh_context { /* private: driver private data, indirection table, and hash key are * stored sequentially in @data area. Use below helpers to access. */ + u32 key_off; u8 data[] __aligned(sizeof(void *)); }; @@ -196,18 +197,7 @@ static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) { - return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); -} - -static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, - u16 priv_size) -{ - size_t indir_bytes = array_size(indir_size, sizeof(u32)); - size_t flex_len; - - flex_len = size_add(size_add(indir_bytes, key_size), - ALIGN(priv_size, sizeof(u32))); - return struct_size_t(struct ethtool_rxfh_context, data, flex_len); + return &ctx->data[ctx->key_off]; } void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); @@ -723,6 +713,10 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_indir_space: max size of RSS indirection tables, if indirection table + * size as returned by @get_rxfh_indir_size may change during lifetime + * of the device. Leave as 0 if the table size is constant. + * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this @@ -940,6 +934,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u32 rxfh_indir_space; + u16 rxfh_key_space; u16 rxfh_priv_size; u32 rxfh_max_context_id; u32 supported_coalesce_params; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 615812ff8974..0732710a4836 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1290,6 +1290,40 @@ out: return ret; } +static struct ethtool_rxfh_context * +ethtool_rxfh_ctx_alloc(const struct ethtool_ops *ops, + u32 indir_size, u32 key_size) +{ + size_t indir_bytes, flex_len, key_off, size; + struct ethtool_rxfh_context *ctx; + u32 priv_bytes, indir_max; + u16 key_max; + + key_max = max(key_size, ops->rxfh_key_space); + indir_max = max(indir_size, ops->rxfh_indir_space); + + priv_bytes = ALIGN(ops->rxfh_priv_size, sizeof(u32)); + indir_bytes = array_size(indir_max, sizeof(u32)); + + key_off = size_add(priv_bytes, indir_bytes); + flex_len = size_add(key_off, key_max); + size = struct_size_t(struct ethtool_rxfh_context, data, flex_len); + + ctx = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!ctx) + return NULL; + + ctx->indir_size = indir_size; + ctx->key_size = key_size; + ctx->key_off = key_off; + ctx->priv_size = ops->rxfh_priv_size; + + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + + return ctx; +} + static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, void __user *useraddr) { @@ -1406,20 +1440,12 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, ret = -EINVAL; goto out; } - ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, - dev_key_size, - ops->rxfh_priv_size), - GFP_KERNEL_ACCOUNT); + ctx = ethtool_rxfh_ctx_alloc(ops, dev_indir_size, dev_key_size); if (!ctx) { ret = -ENOMEM; goto out; } - ctx->indir_size = dev_indir_size; - ctx->key_size = dev_key_size; - ctx->priv_size = ops->rxfh_priv_size; - /* Initialise to an empty context */ - ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; - ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { u32 limit = ops->rxfh_max_context_id ?: U32_MAX; u32 ctx_id; -- cgit v1.2.3 From 63c6e08eac8e7e2ad2e883c968e58e23e1dc8c70 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 11 Jul 2024 17:33:07 -0700 Subject: net/mlx5: IFC updates for SF max IO EQs Expose a new cap sf_eq_usage. The vhca_resource_manager can write this cap, indicating the SF driver should use max_num_eqs_24b to determine how many EQs to use. Will be used in the next patch, to indicate to the SF driver from the PF that the user has set the max io eqs via devlink. So the SF driver can later query the proper max eq value from the new cap. devlink port function set pci/0000:08:00.0/32768 max_io_eqs 32 Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://patch.msgid.link/20240712003310.355106-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fdad0071d599..360d42f041b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1994,7 +1994,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migration_tracking_state[0x1]; u8 reserved_at_ca[0x6]; u8 migration_in_chunks[0x1]; - u8 reserved_at_d1[0xf]; + u8 reserved_at_d1[0x1]; + u8 sf_eq_usage[0x1]; + u8 reserved_at_d3[0xd]; u8 cross_vhca_object_to_object_supported[0x20]; -- cgit v1.2.3 From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable@vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 20 ++++++++++++++------ net/ipv6/ip6_output.c | 1 + net/ipv6/route.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { + rcu_read_lock(); + + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } -- cgit v1.2.3 From 0ece498c27d8cd2fdad6f49a6abc34b8badd8fbc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 10 May 2024 10:36:45 -0400 Subject: Bluetooth: MGMT: Make MGMT_OP_LOAD_CONN_PARAM update existing connection This makes MGMT_OP_LOAD_CONN_PARAM update existing connection by dectecting the request is just for one connection, parameters already exists and there is a connection. Since this is a new behavior the revision is also updated to enable userspace to detect it. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +++ net/bluetooth/hci_sync.c | 18 +++++++++++++++ net/bluetooth/mgmt.c | 50 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 534c3386e714..20168732f20e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -138,6 +138,7 @@ int hci_suspend_sync(struct hci_dev *hdev); int hci_resume_sync(struct hci_dev *hdev); struct hci_conn; +struct hci_conn_params; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); @@ -156,3 +157,5 @@ int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, + struct hci_conn_params *params); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eea34e6a236f..82db6092965b 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6724,3 +6724,21 @@ int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) return -ENOENT; } + +int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, + struct hci_conn_params *params) +{ + struct hci_cp_le_conn_update cp; + + memset(&cp, 0, sizeof(cp)); + cp.handle = cpu_to_le16(conn->handle); + cp.conn_interval_min = cpu_to_le16(params->conn_min_interval); + cp.conn_interval_max = cpu_to_le16(params->conn_max_interval); + cp.conn_latency = cpu_to_le16(params->conn_latency); + cp.supervision_timeout = cpu_to_le16(params->supervision_timeout); + cp.min_ce_len = cpu_to_le16(0x0000); + cp.max_ce_len = cpu_to_le16(0x0000); + + return __hci_cmd_sync_status(hdev, HCI_OP_LE_CONN_UPDATE, + sizeof(cp), &cp, HCI_CMD_TIMEOUT); +} diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 80f220b7e19d..20eca8a9c681 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -42,7 +42,7 @@ #include "aosp.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 22 +#define MGMT_REVISION 23 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -7813,6 +7813,18 @@ unlock: return err; } +static int conn_update_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn_params *params = data; + struct hci_conn *conn; + + conn = hci_conn_hash_lookup_le(hdev, ¶ms->addr, params->addr_type); + if (!conn) + return -ECANCELED; + + return hci_le_conn_update_sync(hdev, conn, params); +} + static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -7846,13 +7858,15 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - hci_conn_params_clear_disabled(hdev); + if (param_count > 1) + hci_conn_params_clear_disabled(hdev); for (i = 0; i < param_count; i++) { struct mgmt_conn_param *param = &cp->params[i]; struct hci_conn_params *hci_param; u16 min, max, latency, timeout; u8 addr_type; + bool update; bt_dev_dbg(hdev, "Adding %pMR (type %u)", ¶m->addr.bdaddr, param->addr.type); @@ -7879,6 +7893,19 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, continue; } + /* Detect when the loading is for an existing parameter then + * attempt to trigger the connection update procedure. + */ + if (!i && param_count == 1) { + hci_param = hci_conn_params_lookup(hdev, + ¶m->addr.bdaddr, + addr_type); + if (hci_param) + update = true; + else + hci_conn_params_clear_disabled(hdev); + } + hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, addr_type); if (!hci_param) { @@ -7890,6 +7917,25 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, hci_param->conn_max_interval = max; hci_param->conn_latency = latency; hci_param->supervision_timeout = timeout; + + /* Check if we need to trigger a connection update */ + if (update) { + struct hci_conn *conn; + + /* Lookup for existing connection as central and check + * if parameters match and if they don't then trigger + * a connection update. + */ + conn = hci_conn_hash_lookup_le(hdev, &hci_param->addr, + addr_type); + if (conn && conn->role == HCI_ROLE_MASTER && + (conn->le_conn_min_interval != min || + conn->le_conn_max_interval != max || + conn->le_conn_latency != latency || + conn->le_supv_timeout != timeout)) + hci_cmd_sync_queue(hdev, conn_update_sync, + hci_param, NULL); + } } hci_dev_unlock(hdev); -- cgit v1.2.3 From 8f7dfe171c576aaec4911cc59feaed26d79c7c7f Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sat, 18 May 2024 10:30:38 +0200 Subject: Bluetooth: hci_core: Prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct hci_dev_list_req" and this structure ends in a flexible array: struct hci_dev_list_req { [...] struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sock.h | 2 +- net/bluetooth/hci_core.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9949870f7d78..13e8cd4414a1 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,7 +144,7 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[] __counted_by(dev_num); }; struct hci_conn_list_req { diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c644b30977bd..a2cad8a982f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -801,7 +801,7 @@ int hci_get_dev_list(void __user *arg) struct hci_dev *hdev; struct hci_dev_list_req *dl; struct hci_dev_req *dr; - int n = 0, size, err; + int n = 0, err; __u16 dev_num; if (get_user(dev_num, (__u16 __user *) arg)) @@ -810,12 +810,11 @@ int hci_get_dev_list(void __user *arg) if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr)) return -EINVAL; - size = sizeof(*dl) + dev_num * sizeof(*dr); - - dl = kzalloc(size, GFP_KERNEL); + dl = kzalloc(struct_size(dl, dev_req, dev_num), GFP_KERNEL); if (!dl) return -ENOMEM; + dl->dev_num = dev_num; dr = dl->dev_req; read_lock(&hci_dev_list_lock); @@ -838,9 +837,7 @@ int hci_get_dev_list(void __user *arg) read_unlock(&hci_dev_list_lock); dl->dev_num = n; - size = sizeof(*dl) + n * sizeof(*dr); - - err = copy_to_user(arg, dl, size); + err = copy_to_user(arg, dl, struct_size(dl, dev_req, n)); kfree(dl); return err ? -EFAULT : 0; -- cgit v1.2.3 From 7d2c7ddba6238e6a14cd89ef869878dd22f2a661 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 17 May 2024 19:21:49 +0200 Subject: tty: rfcomm: prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct rfcomm_dev_list_req" and this structure ends in a flexible array: struct rfcomm_dev_list_req { [...] struct rfcomm_dev_info dev_info[]; }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/rfcomm.h | 2 +- net/bluetooth/rfcomm/tty.c | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 99d26879b02a..c05882476900 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[]; + struct rfcomm_dev_info dev_info[] __counted_by(dev_num); }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 69c75c041fe1..44b781e7569e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -504,7 +504,7 @@ static int rfcomm_get_dev_list(void __user *arg) struct rfcomm_dev *dev; struct rfcomm_dev_list_req *dl; struct rfcomm_dev_info *di; - int n = 0, size, err; + int n = 0, err; u16 dev_num; BT_DBG(""); @@ -515,12 +515,11 @@ static int rfcomm_get_dev_list(void __user *arg) if (!dev_num || dev_num > (PAGE_SIZE * 4) / sizeof(*di)) return -EINVAL; - size = sizeof(*dl) + dev_num * sizeof(*di); - - dl = kzalloc(size, GFP_KERNEL); + dl = kzalloc(struct_size(dl, dev_info, dev_num), GFP_KERNEL); if (!dl) return -ENOMEM; + dl->dev_num = dev_num; di = dl->dev_info; mutex_lock(&rfcomm_dev_lock); @@ -542,9 +541,7 @@ static int rfcomm_get_dev_list(void __user *arg) mutex_unlock(&rfcomm_dev_lock); dl->dev_num = n; - size = sizeof(*dl) + n * sizeof(*di); - - err = copy_to_user(arg, dl, size); + err = copy_to_user(arg, dl, struct_size(dl, dev_info, n)); kfree(dl); return err ? -EFAULT : 0; -- cgit v1.2.3 From f25b7fd36cc3a850e006aed686f5bbecd200de1b Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Wed, 29 May 2024 08:00:00 +0000 Subject: Bluetooth: Add vendor-specific packet classification for ISO data When HCI raw sockets are opened, the Bluetooth kernel module doesn't track CIS/BIS connections. User-space applications have to identify ISO data by maintaining connection information and look up the mapping for each ACL data packet received. Besides, btsnoop log captured in kernel couldn't tell ISO data from ACL data in this case. To avoid additional lookups, this patch introduces vendor-specific packet classification for Intel BT controllers to distinguish ISO data packets from ACL data packets. Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 25 +++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 16 ++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index 27e03951e68b..ff33e1aa2929 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2549,6 +2549,24 @@ static void btintel_set_dsm_reset_method(struct hci_dev *hdev, data->acpi_reset_method = btintel_acpi_reset_method; } +#define BTINTEL_ISODATA_HANDLE_BASE 0x900 + +static u8 btintel_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) +{ + /* + * Distinguish ISO data packets form ACL data packets + * based on their connection handle value range. + */ + if (hci_skb_pkt_type(skb) == HCI_ACLDATA_PKT) { + __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); + + if (hci_handle(handle) >= BTINTEL_ISODATA_HANDLE_BASE) + return HCI_ISODATA_PKT; + } + + return hci_skb_pkt_type(skb); +} + int btintel_bootloader_setup_tlv(struct hci_dev *hdev, struct intel_version_tlv *ver) { @@ -2989,11 +3007,14 @@ static int btintel_setup_combined(struct hci_dev *hdev) err = btintel_bootloader_setup(hdev, &ver); btintel_register_devcoredump_support(hdev); break; + case 0x18: /* GfP2 */ + case 0x1c: /* GaP */ + /* Re-classify packet type for controllers with LE audio */ + hdev->classify_pkt_type = btintel_classify_pkt_type; + fallthrough; case 0x17: - case 0x18: case 0x19: case 0x1b: - case 0x1c: case 0x1e: /* Display version information of TLV type */ btintel_version_info_tlv(hdev, &ver_tlv); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c43716edf205..f7de2681d457 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -649,6 +649,7 @@ struct hci_dev { int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type, struct bt_codec *codec, __u8 *vnd_len, __u8 **vnd_data); + u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 55bdc365916f..144e85ebfbdb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2909,15 +2909,31 @@ int hci_reset_dev(struct hci_dev *hdev) } EXPORT_SYMBOL(hci_reset_dev); +static u8 hci_dev_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) +{ + if (hdev->classify_pkt_type) + return hdev->classify_pkt_type(hdev, skb); + + return hci_skb_pkt_type(skb); +} + /* Receive frame from HCI drivers */ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) { + u8 dev_pkt_type; + if (!hdev || (!test_bit(HCI_UP, &hdev->flags) && !test_bit(HCI_INIT, &hdev->flags))) { kfree_skb(skb); return -ENXIO; } + /* Check if the driver agree with packet type classification */ + dev_pkt_type = hci_dev_classify_pkt_type(hdev, skb); + if (hci_skb_pkt_type(skb) != dev_pkt_type) { + hci_skb_pkt_type(skb) = dev_pkt_type; + } + switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: break; -- cgit v1.2.3 From da63f331353c9e1e6dc29e49e28f8f4fe5d642fd Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 18 Jun 2024 21:59:32 +0300 Subject: Bluetooth: hci_core, hci_sync: cleanup struct discovery_state After commit 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work"), 'scan_start' and 'scan_duration' of 'struct discovery_state' are still initialized but actually unused. So remove the aforementioned fields and adjust 'hci_discovery_filter_clear()' and 'le_scan_disable()' accordingly. Compile tested only. Fixes: 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ---- net/bluetooth/hci_sync.c | 2 -- 2 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7de2681d457..eaeaf3dc07aa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -91,8 +91,6 @@ struct discovery_state { s8 rssi; u16 uuid_count; u8 (*uuids)[16]; - unsigned long scan_start; - unsigned long scan_duration; unsigned long name_resolve_timeout; }; @@ -891,8 +889,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.uuid_count = 0; kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; - hdev->discovery.scan_start = 0; - hdev->discovery.scan_duration = 0; } bool hci_discovery_active(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 82db6092965b..ccad43f10415 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -371,8 +371,6 @@ static void le_scan_disable(struct work_struct *work) goto _return; } - hdev->discovery.scan_start = 0; - /* If we were running LE only scan, change discovery state. If * we were running both LE and BR/EDR inquiry simultaneously, * and BR/EDR inquiry is already finished, stop discovery, -- cgit v1.2.3 From 2c1583290b08c5aa9005178a573be8f329de2976 Mon Sep 17 00:00:00 2001 From: "Kamil Horák (2N)" Date: Fri, 12 Jul 2024 17:07:06 +0200 Subject: net: phy: bcm54811: New link mode for BroadR-Reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new link mode necessary for 10 MBit single-pair connection in BroadR-Reach mode on bcm5481x PHY by Broadcom. This new link mode, 10baseT1BRR, is known as 1BR10 in the Broadcom terminology. Another link mode to be used is 1BR100 and it is already present as 100baseT1, because Broadcom's 1BR100 became 100baseT1 (IEEE 802.3bw). Signed-off-by: Kamil Horák (2N) Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240712150709.3134474-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-core.c | 3 ++- include/uapi/linux/ethtool.h | 1 + net/ethtool/common.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index a235ea2264a7..1f98b6a96c15 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 102, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -266,6 +266,7 @@ static const struct phy_setting settings[] = { PHY_SETTING( 10, FULL, 10baseT1S_Full ), PHY_SETTING( 10, HALF, 10baseT1S_Half ), PHY_SETTING( 10, HALF, 10baseT1S_P2MP_Half ), + PHY_SETTING( 10, FULL, 10baseT1BRR_Full ), }; #undef PHY_SETTING diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 230110b97029..4a0a6e703483 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2054,6 +2054,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, + ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 67d06cd002a5..5f99a32e4fe5 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -211,6 +211,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -251,6 +252,7 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 +#define __LINK_MODE_LANES_T1BRR 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ @@ -374,6 +376,7 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), + __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); -- cgit v1.2.3 From ff253875ff3bab75214c4e94352956dc02a0d965 Mon Sep 17 00:00:00 2001 From: "Kamil Horák (2N)" Date: Fri, 12 Jul 2024 17:07:07 +0200 Subject: net: phy: bcm54811: Add LRE registers definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the definitions of LRE registers for Broadcom BCM5481x PHY Signed-off-by: Kamil Horák (2N) Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240712150709.3134474-3-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 1394ba302367..028b3e00378e 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -271,12 +271,100 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BroadR-Reach LRE Registers. */ +#define MII_BCM54XX_LRECR 0x00 /* LRE Control Register */ +#define MII_BCM54XX_LRESR 0x01 /* LRE Status Register */ +#define MII_BCM54XX_LREPHYSID1 0x02 /* LRE PHYS ID 1 */ +#define MII_BCM54XX_LREPHYSID2 0x03 /* LRE PHYS ID 2 */ +#define MII_BCM54XX_LREANAA 0x04 /* LDS Auto-Negotiation Advertised Ability */ +#define MII_BCM54XX_LREANAC 0x05 /* LDS Auto-Negotiation Advertised Control */ +#define MII_BCM54XX_LREANPT 0x06 /* LDS Ability Next Page Transmit */ +#define MII_BCM54XX_LRELPA 0x07 /* LDS Link Partner Ability */ +#define MII_BCM54XX_LRELPNPM 0x08 /* LDS Link Partner Next Page Message */ +#define MII_BCM54XX_LRELPNPC 0x09 /* LDS Link Partner Next Page Control */ +#define MII_BCM54XX_LRELDSE 0x0a /* LDS Expansion Register */ +#define MII_BCM54XX_LREES 0x0f /* LRE Extended Status */ + +/* LRE control register. */ +#define LRECR_RESET 0x8000 /* Reset to default state */ +#define LRECR_LOOPBACK 0x4000 /* Internal Loopback */ +#define LRECR_LDSRES 0x2000 /* Restart LDS Process */ +#define LRECR_LDSEN 0x1000 /* LDS Enable */ +#define LRECR_PDOWN 0x0800 /* Enable low power state */ +#define LRECR_ISOLATE 0x0400 /* Isolate data paths from MII */ +#define LRECR_SPEED100 0x0200 /* Select 100 Mbps */ +#define LRECR_SPEED10 0x0000 /* Select 10 Mbps */ +#define LRECR_4PAIRS 0x0020 /* Select 4 Pairs */ +#define LRECR_2PAIRS 0x0010 /* Select 2 Pairs */ +#define LRECR_1PAIR 0x0000 /* Select 1 Pair */ +#define LRECR_MASTER 0x0008 /* Force Master when LDS disabled */ +#define LRECR_SLAVE 0x0000 /* Force Slave when LDS disabled */ + +/* LRE status register. */ +#define LRESR_100_1PAIR 0x2000 /* Can do 100Mbps 1 Pair */ +#define LRESR_100_4PAIR 0x1000 /* Can do 100Mbps 4 Pairs */ +#define LRESR_100_2PAIR 0x0800 /* Can do 100Mbps 2 Pairs */ +#define LRESR_10_2PAIR 0x0400 /* Can do 10Mbps 2 Pairs */ +#define LRESR_10_1PAIR 0x0200 /* Can do 10Mbps 1 Pair */ +#define LRESR_ESTATEN 0x0100 /* Extended Status in R15 */ +#define LRESR_RESV 0x0080 /* Unused... */ +#define LRESR_MFPS 0x0040 /* Can suppress Management Frames Preamble */ +#define LRESR_LDSCOMPLETE 0x0020 /* LDS Auto-negotiation complete */ +#define LRESR_8023 0x0010 /* Has IEEE 802.3 Support */ +#define LRESR_LDSABILITY 0x0008 /* LDS auto-negotiation capable */ +#define LRESR_LSTATUS 0x0004 /* Link status */ +#define LRESR_JCD 0x0002 /* Jabber detected */ +#define LRESR_ERCAP 0x0001 /* Ext-reg capability */ + +/* LDS Auto-Negotiation Advertised Ability. */ +#define LREANAA_PAUSE_ASYM 0x8000 /* Can pause asymmetrically */ +#define LREANAA_PAUSE 0x4000 /* Can pause */ +#define LREANAA_100_1PAIR 0x0020 /* Can do 100Mbps 1 Pair */ +#define LREANAA_100_4PAIR 0x0010 /* Can do 100Mbps 4 Pair */ +#define LREANAA_100_2PAIR 0x0008 /* Can do 100Mbps 2 Pair */ +#define LREANAA_10_2PAIR 0x0004 /* Can do 10Mbps 2 Pair */ +#define LREANAA_10_1PAIR 0x0002 /* Can do 10Mbps 1 Pair */ + +#define LRE_ADVERTISE_FULL (LREANAA_100_1PAIR | LREANAA_100_4PAIR | \ + LREANAA_100_2PAIR | LREANAA_10_2PAIR | \ + LREANAA_10_1PAIR) + +#define LRE_ADVERTISE_ALL LRE_ADVERTISE_FULL + +/* LDS Link Partner Ability. */ +#define LRELPA_PAUSE_ASYM 0x8000 /* Supports asymmetric pause */ +#define LRELPA_PAUSE 0x4000 /* Supports pause capability */ +#define LRELPA_100_1PAIR 0x0020 /* 100Mbps 1 Pair capable */ +#define LRELPA_100_4PAIR 0x0010 /* 100Mbps 4 Pair capable */ +#define LRELPA_100_2PAIR 0x0008 /* 100Mbps 2 Pair capable */ +#define LRELPA_10_2PAIR 0x0004 /* 10Mbps 2 Pair capable */ +#define LRELPA_10_1PAIR 0x0002 /* 10Mbps 1 Pair capable */ + +/* LDS Expansion register. */ +#define LDSE_DOWNGRADE 0x8000 /* Can do LDS Speed Downgrade */ +#define LDSE_MASTER 0x4000 /* Master / Slave */ +#define LDSE_PAIRS_MASK 0x3000 /* Pair Count Mask */ +#define LDSE_PAIRS_SHIFT 12 +#define LDSE_4PAIRS (2 << LDSE_PAIRS_SHIFT) /* 4 Pairs Connection */ +#define LDSE_2PAIRS (1 << LDSE_PAIRS_SHIFT) /* 2 Pairs Connection */ +#define LDSE_1PAIR (0 << LDSE_PAIRS_SHIFT) /* 1 Pair Connection */ +#define LDSE_CABLEN_MASK 0x0FFF /* Cable Length Mask */ + /* BCM54810 Registers */ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +/* BCM54811 Registers */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL (MII_BCM54XX_EXP_SEL_ER + 0x9A) +/* Access Control Override Enable */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_EN BIT(15) +/* Access Control Override Value */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_OVERRIDE_VAL BIT(14) +/* Access Control Value */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_VAL BIT(13) + /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) #define BCM54612E_LED4_CLK125OUT_EN (1 << 1) -- cgit v1.2.3 From 6c87e1a4792804efce8ab3dfdb6e9ada314ec6dd Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:13 +0200 Subject: net: dsa: vsc73xx: introduce tag 8021q for vsc73xx This commit introduces a new tagger based on 802.1q tagging. It's designed for the vsc73xx driver. The VSC73xx family doesn't have any tag support for the RGMII port, but it could be based on VLANs. Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20240713211620.1125910-8-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 ++ net/dsa/Kconfig | 6 ++++ net/dsa/Makefile | 1 + net/dsa/tag_vsc73xx_8021q.c | 68 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 net/dsa/tag_vsc73xx_8021q.c (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..5a5a03a7b4c3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -53,6 +53,7 @@ struct tc_action; #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 +#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -83,6 +84,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, + DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, }; struct dsa_switch; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 8d5bf869eb14..2dfe9063613f 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -166,6 +166,12 @@ config NET_DSA_TAG_TRAILER Say Y or M if you want to enable support for tagging frames at with a trailed. e.g. Marvell 88E6060. +config NET_DSA_TAG_VSC73XX_8021Q + tristate "Tag driver for Microchip/Vitesse VSC73xx family of switches, using VLAN" + help + Say Y or M if you want to enable support for tagging frames with a + custom VLAN-based header. + config NET_DSA_TAG_XRS700X tristate "Tag driver for XRS700x switches" help diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8a1894a42552..555c07cfeb71 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL8_4) += tag_rtl8_4.o obj-$(CONFIG_NET_DSA_TAG_RZN1_A5PSW) += tag_rzn1_a5psw.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_VSC73XX_8021Q) += tag_vsc73xx_8021q.o obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o # for tracing framework to find trace.h diff --git a/net/dsa/tag_vsc73xx_8021q.c b/net/dsa/tag_vsc73xx_8021q.c new file mode 100644 index 000000000000..af121a9aff7f --- /dev/null +++ b/net/dsa/tag_vsc73xx_8021q.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* Copyright (C) 2024 Pawel Dembicki + */ +#include + +#include "tag.h" +#include "tag_8021q.h" + +#define VSC73XX_8021Q_NAME "vsc73xx-8021q" + +static struct sk_buff * +vsc73xx_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct dsa_port *dp = dsa_user_to_port(netdev); + u16 queue_mapping = skb_get_queue_mapping(skb); + u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); + u8 pcp; + + if (skb->offload_fwd_mark) { + unsigned int bridge_num = dsa_port_bridge_num_get(dp); + struct net_device *br = dsa_port_bridge_dev_get(dp); + + if (br_vlan_enabled(br)) + return skb; + + tx_vid = dsa_tag_8021q_bridge_vid(bridge_num); + } + + pcp = netdev_txq_to_tc(netdev, queue_mapping); + + return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff * +vsc73xx_rcv(struct sk_buff *skb, struct net_device *netdev) +{ + int src_port = -1, switch_id = -1, vbid = -1, vid = -1; + + dsa_8021q_rcv(skb, &src_port, &switch_id, &vbid, &vid); + + skb->dev = dsa_tag_8021q_find_user(netdev, src_port, switch_id, + vid, vbid); + if (!skb->dev) { + dev_warn_ratelimited(&netdev->dev, + "Couldn't decode source port\n"); + return NULL; + } + + dsa_default_offload_fwd_mark(skb); + + return skb; +} + +static const struct dsa_device_ops vsc73xx_8021q_netdev_ops = { + .name = VSC73XX_8021Q_NAME, + .proto = DSA_TAG_PROTO_VSC73XX_8021Q, + .xmit = vsc73xx_xmit, + .rcv = vsc73xx_rcv, + .needed_headroom = VLAN_HLEN, + .promisc_on_conduit = true, +}; + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("DSA tag driver for VSC73XX family of switches, using VLAN"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_VSC73XX_8021Q, VSC73XX_8021Q_NAME); + +module_dsa_tag_driver(vsc73xx_8021q_netdev_ops); -- cgit v1.2.3 From ce20fdd670ac375a4e3dff91c2888ad9ff9eef56 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:15 +0200 Subject: net: dsa: Define max num of bridges in tag8021q implementation Max number of bridges in tag8021q implementation is strictly limited by VBID size: 3 bits. But zero is reserved and only 7 values can be used. This patch adds define which describe maximum possible value. Suggested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-10-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 3 +-- include/linux/dsa/8021q.h | 5 +++++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index ee0fb1c343f1..0c55a29d7dd3 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3167,8 +3167,7 @@ static int sja1105_setup(struct dsa_switch *ds) ds->vlan_filtering_is_global = true; ds->untag_bridge_pvid = true; ds->fdb_isolation = true; - /* tag_8021q has 3 bits for the VBID, and the value 0 is reserved */ - ds->max_num_bridges = 7; + ds->max_num_bridges = DSA_TAG_8021Q_MAX_NUM_BRIDGES; /* Advertise the 8 egress queues */ ds->num_tx_queues = SJA1105_NUM_TC; diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index f3664ee12170..1dda2a13b832 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -8,6 +8,11 @@ #include #include +/* VBID is limited to three bits only and zero is reserved. + * Only 7 bridges can be enumerated. + */ +#define DSA_TAG_8021Q_MAX_NUM_BRIDGES 7 + int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); -- cgit v1.2.3 From 85aabd1fe9d6af4dc5d11a2d8be567ec45d1dc5e Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:16 +0200 Subject: net: dsa: prepare 'dsa_tag_8021q_bridge_join' for standalone use The 'dsa_tag_8021q_bridge_join' could be used as a generic implementation of the 'ds->ops->port_bridge_join()' function. However, it is necessary to synchronize their arguments. This patch also moves the 'tx_fwd_offload' flag configuration line into 'dsa_tag_8021q_bridge_join' body. Currently, every (sja1105) driver sets it, and the future vsc73xx implementation will also need it for simplification. Suggested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-11-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 5 ++--- include/linux/dsa/8021q.h | 3 ++- net/dsa/tag_8021q.c | 5 ++++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 0c55a29d7dd3..c7282ce3d11c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2133,14 +2133,13 @@ static int sja1105_bridge_join(struct dsa_switch *ds, int port, if (rc) return rc; - rc = dsa_tag_8021q_bridge_join(ds, port, bridge); + rc = dsa_tag_8021q_bridge_join(ds, port, bridge, tx_fwd_offload, + extack); if (rc) { sja1105_bridge_member(ds, port, bridge, false); return rc; } - *tx_fwd_offload = true; - return 0; } diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 1dda2a13b832..d13aabdeb4b2 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -18,7 +18,8 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index c0eee113a2b9..3ee53e28ec2e 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -286,7 +286,8 @@ int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds, * be used for VLAN-unaware bridging. */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge) + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_to_port(ds, port); u16 standalone_vid, bridge_vid; @@ -304,6 +305,8 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, dsa_port_tag_8021q_vlan_del(dp, standalone_vid, false); + *tx_fwd_offload = true; + return 0; } EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_join); -- cgit v1.2.3 From 3ba74b2f288bbc17c0c2a58ab219e1df19f80153 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 21 Jun 2024 16:01:55 +0300 Subject: Bluetooth: hci_core: cleanup struct hci_dev Remove unused and set but otherwise unused 'discovery_old_state' and 'sco_last_tx' members of 'struct hci_dev'. The first one is a leftover after commit 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier"); the second one is originated from ancient 2.4.19 and I was unable to find any actual use since that. Signed-off-by: Dmitry Antipov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 -- net/bluetooth/hci_sync.c | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index eaeaf3dc07aa..31020891fc68 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -476,7 +476,6 @@ struct hci_dev { unsigned int iso_pkts; unsigned long acl_last_tx; - unsigned long sco_last_tx; unsigned long le_last_tx; __u8 le_tx_def_phys; @@ -528,7 +527,6 @@ struct hci_dev { struct discovery_state discovery; - int discovery_old_state; bool discovery_paused; int advertising_old_state; bool advertising_paused; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ccad43f10415..76b283b8e90d 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5853,7 +5853,6 @@ static int hci_pause_discovery_sync(struct hci_dev *hdev) return err; hdev->discovery_paused = true; - hdev->discovery_old_state = old_state; hci_discovery_set_state(hdev, DISCOVERY_STOPPED); return 0; -- cgit v1.2.3 From 92048ab2e2e6cc90ad1cc9f55deb5cec4d731793 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:00:08 -0400 Subject: Bluetooth: hci_core: Remove usage of hci_req_sync hci_request functions are considered deprecated so this replaces the usage of hci_req_sync with hci_inquiry_sync. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 ++ net/bluetooth/hci_core.c | 29 ++++------------------------- net/bluetooth/hci_sync.c | 10 +++++----- 3 files changed, 11 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 20168732f20e..620e6014beb2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -131,6 +131,8 @@ int hci_update_discoverable(struct hci_dev *hdev); int hci_update_connectable_sync(struct hci_dev *hdev); +int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp); + int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db9b9bb7d74d..cce755a84ea7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -312,33 +312,12 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) return copied; } -static int hci_inq_req(struct hci_request *req, unsigned long opt) -{ - struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; - struct hci_dev *hdev = req->hdev; - struct hci_cp_inquiry cp; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_INQUIRY, &hdev->flags)) - return 0; - - /* Start Inquiry */ - memcpy(&cp.lap, &ir->lap, 3); - cp.length = ir->length; - cp.num_rsp = ir->num_rsp; - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return 0; -} - int hci_inquiry(void __user *arg) { __u8 __user *ptr = arg; struct hci_inquiry_req ir; struct hci_dev *hdev; int err = 0, do_inquiry = 0, max_rsp; - long timeo; __u8 *buf; if (copy_from_user(&ir, ptr, sizeof(ir))) @@ -377,11 +356,11 @@ int hci_inquiry(void __user *arg) } hci_dev_unlock(hdev); - timeo = ir.length * msecs_to_jiffies(2000); - if (do_inquiry) { - err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, - timeo, NULL); + hci_req_sync_lock(hdev); + err = hci_inquiry_sync(hdev, ir.length, ir.num_rsp); + hci_req_sync_unlock(hdev); + if (err < 0) goto done; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 76b283b8e90d..46b394842a27 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -347,10 +347,9 @@ static int scan_disable_sync(struct hci_dev *hdev, void *data) return hci_scan_disable_sync(hdev); } -static int hci_inquiry_sync(struct hci_dev *hdev, u8 length); static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data) { - return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN, 0); } static void le_scan_disable(struct work_struct *work) @@ -5662,7 +5661,7 @@ int hci_update_connectable_sync(struct hci_dev *hdev) return hci_update_passive_scan_sync(hdev); } -static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) +int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp) { const u8 giac[3] = { 0x33, 0x8b, 0x9e }; const u8 liac[3] = { 0x00, 0x8b, 0x9e }; @@ -5685,6 +5684,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length) memcpy(&cp.lap, giac, sizeof(cp.lap)); cp.length = length; + cp.num_rsp = num_rsp; return __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -5771,7 +5771,7 @@ static int hci_start_interleaved_discovery_sync(struct hci_dev *hdev) if (err) return err; - return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); } int hci_start_discovery_sync(struct hci_dev *hdev) @@ -5783,7 +5783,7 @@ int hci_start_discovery_sync(struct hci_dev *hdev) switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: - return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN); + return hci_inquiry_sync(hdev, DISCOV_BREDR_INQUIRY_LEN, 0); case DISCOV_TYPE_INTERLEAVED: /* When running simultaneous discovery, the LE scanning time * should occupy the whole discovery time sine BR/EDR inquiry -- cgit v1.2.3 From 176cbeceb5c5a740216a6be3e751e76aaddf94b9 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:13:56 -0400 Subject: Bluetooth: hci_core: Don't use hci_prepare_cmd This replaces the instance of hci_prepare_cmd with hci_cmd_sync_alloc since the former is part of hci_request.c which is considered deprecated. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 4 ++++ net/bluetooth/hci_core.c | 4 ++-- net/bluetooth/hci_sync.c | 5 ++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 620e6014beb2..a8d88247ac89 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -20,6 +20,10 @@ struct hci_cmd_sync_work_entry { }; struct adv_info; + +struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, struct sock *sk); + /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index cce755a84ea7..9bdd3e1f8cfc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3054,7 +3054,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); - skb = hci_prepare_cmd(hdev, opcode, plen, param); + skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command"); return -ENOMEM; @@ -3089,7 +3089,7 @@ int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, return -EINVAL; } - skb = hci_prepare_cmd(hdev, opcode, plen, param); + skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", opcode); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 46b394842a27..0c8888448d3c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -49,9 +49,8 @@ static void hci_cmd_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, wake_up_interruptible(&hdev->req_wait_q); } -static struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, - u32 plen, const void *param, - struct sock *sk) +struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, struct sock *sk) { int len = HCI_COMMAND_HDR_SIZE + plen; struct hci_command_hdr *hdr; -- cgit v1.2.3 From f2d89775358606c7ab6b6b6c4a02fe1e8cd270b1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:52:57 -0400 Subject: Bluetooth: hci_sync: Remove remaining dependencies of hci_request This removes the dependencies of hci_req_init and hci_request_cancel_all from hci_sync.c. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 17 +++++++++++++++++ net/bluetooth/hci_request.h | 17 ----------------- net/bluetooth/hci_sync.c | 14 +++++++++++--- 3 files changed, 28 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index a8d88247ac89..75e052909b5f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -8,6 +8,23 @@ #define UINT_PTR(_handle) ((void *)((uintptr_t)_handle)) #define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr)) +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + +#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) +#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, int err); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index c91f2838f542..b730da4a8b47 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -22,23 +22,6 @@ #include -#define HCI_REQ_DONE 0 -#define HCI_REQ_PEND 1 -#define HCI_REQ_CANCELED 2 - -#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) -#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) - -struct hci_request { - struct hci_dev *hdev; - struct sk_buff_head cmd_q; - - /* If something goes wrong when building the HCI request, the error - * value is stored in this field. - */ - int err; -}; - void hci_req_init(struct hci_request *req, struct hci_dev *hdev); void hci_req_purge(struct hci_request *req); bool hci_req_status_pend(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 63548c4cc1ac..cd2ed16da8a4 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -12,7 +12,6 @@ #include #include -#include "hci_request.h" #include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" @@ -146,6 +145,13 @@ static int hci_cmd_sync_run(struct hci_request *req) return 0; } +static void hci_request_init(struct hci_request *req, struct hci_dev *hdev) +{ + skb_queue_head_init(&req->cmd_q); + req->hdev = hdev; + req->err = 0; +} + /* This function requires the caller holds hdev->req_lock. */ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, @@ -157,7 +163,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, bt_dev_dbg(hdev, "Opcode 0x%4.4x", opcode); - hci_req_init(&req, hdev); + hci_request_init(&req, hdev); hci_cmd_sync_add(&req, opcode, plen, param, event, sk); @@ -5056,7 +5062,9 @@ int hci_dev_close_sync(struct hci_dev *hdev) cancel_delayed_work(&hdev->ncmd_timer); cancel_delayed_work(&hdev->le_scan_disable); - hci_request_cancel_all(hdev); + hci_cmd_sync_cancel_sync(hdev, ENODEV); + + cancel_interleave_scan(hdev); if (hdev->adv_instance_timeout) { cancel_delayed_work_sync(&hdev->adv_instance_expire); -- cgit v1.2.3 From 936daee9cf08c5e58c9a0fe687f52adb2d80e87d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 17:10:41 -0400 Subject: Bluetooth: Remove hci_request.{c,h} This removes hci_request.{c,h} since it shall no longer be used. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 + net/bluetooth/Makefile | 3 +- net/bluetooth/hci_conn.c | 1 - net/bluetooth/hci_core.c | 1 - net/bluetooth/hci_debugfs.c | 1 - net/bluetooth/hci_event.c | 1 - net/bluetooth/hci_request.c | 903 -------------------------------------- net/bluetooth/hci_request.h | 54 --- net/bluetooth/mgmt.c | 1 - net/bluetooth/msft.c | 1 - 10 files changed, 5 insertions(+), 965 deletions(-) delete mode 100644 net/bluetooth/hci_request.c delete mode 100644 net/bluetooth/hci_request.h (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b3228bd6cd6b..5d655e109b2c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -441,6 +441,10 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb); + #define HCI_REQ_START BIT(0) #define HCI_REQ_SKB BIT(1) diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 628d448d78be..5a3835b7dfcd 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -14,8 +14,7 @@ bluetooth_6lowpan-y := 6lowpan.o bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \ hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \ - ecdh_helper.o hci_request.o mgmt_util.o mgmt_config.o hci_codec.o \ - eir.o hci_sync.o + ecdh_helper.o mgmt_util.o mgmt_config.o hci_codec.o eir.o hci_sync.o bluetooth-$(CONFIG_DEV_COREDUMP) += coredump.o diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 080053a85b4d..8e48ccd2af30 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -34,7 +34,6 @@ #include #include -#include "hci_request.h" #include "smp.h" #include "eir.h" diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fc0424ae551e..8a4ebd93adfc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -40,7 +40,6 @@ #include #include -#include "hci_request.h" #include "hci_debugfs.h" #include "smp.h" #include "leds.h" diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index ce3ff2fa72e5..f625074d1f00 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -28,7 +28,6 @@ #include #include "smp.h" -#include "hci_request.h" #include "hci_debugfs.h" #define DEFINE_QUIRK_ATTRIBUTE(__name, __quirk) \ diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 4611a67d7dcc..dce8035ca799 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -33,7 +33,6 @@ #include #include -#include "hci_request.h" #include "hci_debugfs.h" #include "hci_codec.h" #include "smp.h" diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c deleted file mode 100644 index efea25eb56ce..000000000000 --- a/net/bluetooth/hci_request.c +++ /dev/null @@ -1,903 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - - Copyright (C) 2014 Intel Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include - -#include -#include -#include - -#include "smp.h" -#include "hci_request.h" -#include "msft.h" -#include "eir.h" - -void hci_req_init(struct hci_request *req, struct hci_dev *hdev) -{ - skb_queue_head_init(&req->cmd_q); - req->hdev = hdev; - req->err = 0; -} - -void hci_req_purge(struct hci_request *req) -{ - skb_queue_purge(&req->cmd_q); -} - -bool hci_req_status_pend(struct hci_dev *hdev) -{ - return hdev->req_status == HCI_REQ_PEND; -} - -static int req_run(struct hci_request *req, hci_req_complete_t complete, - hci_req_complete_skb_t complete_skb) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - unsigned long flags; - - bt_dev_dbg(hdev, "length %u", skb_queue_len(&req->cmd_q)); - - /* If an error occurred during request building, remove all HCI - * commands queued on the HCI request queue. - */ - if (req->err) { - skb_queue_purge(&req->cmd_q); - return req->err; - } - - /* Do not allow empty requests */ - if (skb_queue_empty(&req->cmd_q)) - return -ENODATA; - - skb = skb_peek_tail(&req->cmd_q); - if (complete) { - bt_cb(skb)->hci.req_complete = complete; - } else if (complete_skb) { - bt_cb(skb)->hci.req_complete_skb = complete_skb; - bt_cb(skb)->hci.req_flags |= HCI_REQ_SKB; - } - - spin_lock_irqsave(&hdev->cmd_q.lock, flags); - skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); - spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); - - queue_work(hdev->workqueue, &hdev->cmd_work); - - return 0; -} - -int hci_req_run(struct hci_request *req, hci_req_complete_t complete) -{ - return req_run(req, complete, NULL); -} - -int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete) -{ - return req_run(req, NULL, complete); -} - -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb) -{ - bt_dev_dbg(hdev, "result 0x%2.2x", result); - - if (hdev->req_status == HCI_REQ_PEND) { - hdev->req_result = result; - hdev->req_status = HCI_REQ_DONE; - if (skb) { - kfree_skb(hdev->req_skb); - hdev->req_skb = skb_get(skb); - } - wake_up_interruptible(&hdev->req_wait_q); - } -} - -/* Execute request and wait for completion. */ -int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status) -{ - struct hci_request req; - int err = 0; - - bt_dev_dbg(hdev, "start"); - - hci_req_init(&req, hdev); - - hdev->req_status = HCI_REQ_PEND; - - err = func(&req, opt); - if (err) { - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - return err; - } - - err = hci_req_run_skb(&req, hci_req_sync_complete); - if (err < 0) { - hdev->req_status = 0; - - /* ENODATA means the HCI request command queue is empty. - * This can happen when a request with conditionals doesn't - * trigger any commands to be sent. This is normal behavior - * and should not trigger an error return. - */ - if (err == -ENODATA) { - if (hci_status) - *hci_status = 0; - return 0; - } - - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - - return err; - } - - err = wait_event_interruptible_timeout(hdev->req_wait_q, - hdev->req_status != HCI_REQ_PEND, timeout); - - if (err == -ERESTARTSYS) - return -EINTR; - - switch (hdev->req_status) { - case HCI_REQ_DONE: - err = -bt_to_errno(hdev->req_result); - if (hci_status) - *hci_status = hdev->req_result; - break; - - case HCI_REQ_CANCELED: - err = -hdev->req_result; - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - break; - - default: - err = -ETIMEDOUT; - if (hci_status) - *hci_status = HCI_ERROR_UNSPECIFIED; - break; - } - - kfree_skb(hdev->req_skb); - hdev->req_skb = NULL; - hdev->req_status = hdev->req_result = 0; - - bt_dev_dbg(hdev, "end: err %d", err); - - return err; -} - -int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status) -{ - int ret; - - /* Serialize all requests */ - hci_req_sync_lock(hdev); - /* check the state after obtaing the lock to protect the HCI_UP - * against any races from hci_dev_do_close when the controller - * gets removed. - */ - if (test_bit(HCI_UP, &hdev->flags)) - ret = __hci_req_sync(hdev, req, opt, timeout, hci_status); - else - ret = -ENETDOWN; - hci_req_sync_unlock(hdev); - - return ret; -} - -struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param) -{ - int len = HCI_COMMAND_HDR_SIZE + plen; - struct hci_command_hdr *hdr; - struct sk_buff *skb; - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return NULL; - - hdr = skb_put(skb, HCI_COMMAND_HDR_SIZE); - hdr->opcode = cpu_to_le16(opcode); - hdr->plen = plen; - - if (plen) - skb_put_data(skb, param, plen); - - bt_dev_dbg(hdev, "skb len %d", skb->len); - - hci_skb_pkt_type(skb) = HCI_COMMAND_PKT; - hci_skb_opcode(skb) = opcode; - - return skb; -} - -/* Queue a command to an asynchronous HCI request */ -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event) -{ - struct hci_dev *hdev = req->hdev; - struct sk_buff *skb; - - bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen); - - /* If an error occurred during request building, there is no point in - * queueing the HCI command. We can simply return. - */ - if (req->err) - return; - - skb = hci_prepare_cmd(hdev, opcode, plen, param); - if (!skb) { - bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", - opcode); - req->err = -ENOMEM; - return; - } - - if (skb_queue_empty(&req->cmd_q)) - bt_cb(skb)->hci.req_flags |= HCI_REQ_START; - - hci_skb_event(skb) = event; - - skb_queue_tail(&req->cmd_q, skb); -} - -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param) -{ - bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode); - hci_req_add_ev(req, opcode, plen, param, 0); -} - -static void start_interleave_scan(struct hci_dev *hdev) -{ - hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; - queue_delayed_work(hdev->req_workqueue, - &hdev->interleave_scan, 0); -} - -static bool is_interleave_scanning(struct hci_dev *hdev) -{ - return hdev->interleave_scan_state != INTERLEAVE_SCAN_NONE; -} - -static void cancel_interleave_scan(struct hci_dev *hdev) -{ - bt_dev_dbg(hdev, "cancelling interleave scan"); - - cancel_delayed_work_sync(&hdev->interleave_scan); - - hdev->interleave_scan_state = INTERLEAVE_SCAN_NONE; -} - -/* Return true if interleave_scan wasn't started until exiting this function, - * otherwise, return false - */ -static bool __hci_update_interleaved_scan(struct hci_dev *hdev) -{ - /* Do interleaved scan only if all of the following are true: - * - There is at least one ADV monitor - * - At least one pending LE connection or one device to be scanned for - * - Monitor offloading is not supported - * If so, we should alternate between allowlist scan and one without - * any filters to save power. - */ - bool use_interleaving = hci_is_adv_monitoring(hdev) && - !(list_empty(&hdev->pend_le_conns) && - list_empty(&hdev->pend_le_reports)) && - hci_get_adv_monitor_offload_ext(hdev) == - HCI_ADV_MONITOR_EXT_NONE; - bool is_interleaving = is_interleave_scanning(hdev); - - if (use_interleaving && !is_interleaving) { - start_interleave_scan(hdev); - bt_dev_dbg(hdev, "starting interleave scan"); - return true; - } - - if (!use_interleaving && is_interleaving) - cancel_interleave_scan(hdev); - - return false; -} - -void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) -{ - struct hci_dev *hdev = req->hdev; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - if (use_ext_scan(hdev)) { - struct hci_cp_le_set_ext_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp), - &cp); - } else { - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_DISABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - } - - /* Disable address resolution */ - if (hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION) && !rpa_le_conn) { - __u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); - } -} - -static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr, - u8 bdaddr_type) -{ - struct hci_cp_le_del_from_accept_list cp; - - cp.bdaddr_type = bdaddr_type; - bacpy(&cp.bdaddr, bdaddr); - - bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr, - cp.bdaddr_type); - hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp); - - if (use_ll_privacy(req->hdev)) { - struct smp_irk *irk; - - irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type); - if (irk) { - struct hci_cp_le_del_from_resolv_list cp; - - cp.bdaddr_type = bdaddr_type; - bacpy(&cp.bdaddr, bdaddr); - - hci_req_add(req, HCI_OP_LE_DEL_FROM_RESOLV_LIST, - sizeof(cp), &cp); - } - } -} - -/* Adds connection to accept list if needed. On error, returns -1. */ -static int add_to_accept_list(struct hci_request *req, - struct hci_conn_params *params, u8 *num_entries, - bool allow_rpa) -{ - struct hci_cp_le_add_to_accept_list cp; - struct hci_dev *hdev = req->hdev; - - /* Already in accept list */ - if (hci_bdaddr_list_lookup(&hdev->le_accept_list, ¶ms->addr, - params->addr_type)) - return 0; - - /* Select filter policy to accept all advertising */ - if (*num_entries >= hdev->le_accept_list_size) - return -1; - - /* Accept list can not be used with RPAs */ - if (!allow_rpa && - !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && - hci_find_irk_by_addr(hdev, ¶ms->addr, params->addr_type)) { - return -1; - } - - /* During suspend, only wakeable devices can be in accept list */ - if (hdev->suspended && - !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) - return 0; - - *num_entries += 1; - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - - bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr, - cp.bdaddr_type); - hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp); - - if (use_ll_privacy(hdev)) { - struct smp_irk *irk; - - irk = hci_find_irk_by_addr(hdev, ¶ms->addr, - params->addr_type); - if (irk) { - struct hci_cp_le_add_to_resolv_list cp; - - cp.bdaddr_type = params->addr_type; - bacpy(&cp.bdaddr, ¶ms->addr); - memcpy(cp.peer_irk, irk->val, 16); - - if (hci_dev_test_flag(hdev, HCI_PRIVACY)) - memcpy(cp.local_irk, hdev->irk, 16); - else - memset(cp.local_irk, 0, 16); - - hci_req_add(req, HCI_OP_LE_ADD_TO_RESOLV_LIST, - sizeof(cp), &cp); - } - } - - return 0; -} - -static u8 update_accept_list(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_conn_params *params; - struct bdaddr_list *b; - u8 num_entries = 0; - bool pend_conn, pend_report; - /* We allow usage of accept list even with RPAs in suspend. In the worst - * case, we won't be able to wake from devices that use the privacy1.2 - * features. Additionally, once we support privacy1.2 and IRK - * offloading, we can update this to also check for those conditions. - */ - bool allow_rpa = hdev->suspended; - - if (use_ll_privacy(hdev)) - allow_rpa = true; - - /* Go through the current accept list programmed into the - * controller one by one and check if that address is still - * in the list of pending connections or list of devices to - * report. If not present in either list, then queue the - * command to remove it from the controller. - */ - list_for_each_entry(b, &hdev->le_accept_list, list) { - pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, - b->bdaddr_type); - pend_report = hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, - b->bdaddr_type); - - /* If the device is not likely to connect or report, - * remove it from the accept list. - */ - if (!pend_conn && !pend_report) { - del_from_accept_list(req, &b->bdaddr, b->bdaddr_type); - continue; - } - - /* Accept list can not be used with RPAs */ - if (!allow_rpa && - !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) && - hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { - return 0x00; - } - - num_entries++; - } - - /* Since all no longer valid accept list entries have been - * removed, walk through the list of pending connections - * and ensure that any new device gets programmed into - * the controller. - * - * If the list of the devices is larger than the list of - * available accept list entries in the controller, then - * just abort and return filer policy value to not use the - * accept list. - */ - list_for_each_entry(params, &hdev->pend_le_conns, action) { - if (add_to_accept_list(req, params, &num_entries, allow_rpa)) - return 0x00; - } - - /* After adding all new pending connections, walk through - * the list of pending reports and also add these to the - * accept list if there is still space. Abort if space runs out. - */ - list_for_each_entry(params, &hdev->pend_le_reports, action) { - if (add_to_accept_list(req, params, &num_entries, allow_rpa)) - return 0x00; - } - - /* Use the allowlist unless the following conditions are all true: - * - We are not currently suspending - * - There are 1 or more ADV monitors registered and it's not offloaded - * - Interleaved scanning is not currently using the allowlist - */ - if (!idr_is_empty(&hdev->adv_monitors_idr) && !hdev->suspended && - hci_get_adv_monitor_offload_ext(hdev) == HCI_ADV_MONITOR_EXT_NONE && - hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST) - return 0x00; - - /* Select filter policy to use accept list */ - return 0x01; -} - -static bool scan_use_rpa(struct hci_dev *hdev) -{ - return hci_dev_test_flag(hdev, HCI_PRIVACY); -} - -static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval, - u16 window, u8 own_addr_type, u8 filter_policy, - bool filter_dup, bool addr_resolv) -{ - struct hci_dev *hdev = req->hdev; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - if (use_ll_privacy(hdev) && addr_resolv) { - u8 enable = 0x01; - - hci_req_add(req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); - } - - /* Use ext scanning if set ext scan param and ext scan enable is - * supported - */ - if (use_ext_scan(hdev)) { - struct hci_cp_le_set_ext_scan_params *ext_param_cp; - struct hci_cp_le_set_ext_scan_enable ext_enable_cp; - struct hci_cp_le_scan_phy_params *phy_params; - u8 data[sizeof(*ext_param_cp) + sizeof(*phy_params) * 2]; - u32 plen; - - ext_param_cp = (void *)data; - phy_params = (void *)ext_param_cp->data; - - memset(ext_param_cp, 0, sizeof(*ext_param_cp)); - ext_param_cp->own_addr_type = own_addr_type; - ext_param_cp->filter_policy = filter_policy; - - plen = sizeof(*ext_param_cp); - - if (scan_1m(hdev) || scan_2m(hdev)) { - ext_param_cp->scanning_phys |= LE_SCAN_PHY_1M; - - memset(phy_params, 0, sizeof(*phy_params)); - phy_params->type = type; - phy_params->interval = cpu_to_le16(interval); - phy_params->window = cpu_to_le16(window); - - plen += sizeof(*phy_params); - phy_params++; - } - - if (scan_coded(hdev)) { - ext_param_cp->scanning_phys |= LE_SCAN_PHY_CODED; - - memset(phy_params, 0, sizeof(*phy_params)); - phy_params->type = type; - phy_params->interval = cpu_to_le16(interval); - phy_params->window = cpu_to_le16(window); - - plen += sizeof(*phy_params); - phy_params++; - } - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_PARAMS, - plen, ext_param_cp); - - memset(&ext_enable_cp, 0, sizeof(ext_enable_cp)); - ext_enable_cp.enable = LE_SCAN_ENABLE; - ext_enable_cp.filter_dup = filter_dup; - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, - sizeof(ext_enable_cp), &ext_enable_cp); - } else { - struct hci_cp_le_set_scan_param param_cp; - struct hci_cp_le_set_scan_enable enable_cp; - - memset(¶m_cp, 0, sizeof(param_cp)); - param_cp.type = type; - param_cp.interval = cpu_to_le16(interval); - param_cp.window = cpu_to_le16(window); - param_cp.own_address_type = own_addr_type; - param_cp.filter_policy = filter_policy; - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), - ¶m_cp); - - memset(&enable_cp, 0, sizeof(enable_cp)); - enable_cp.enable = LE_SCAN_ENABLE; - enable_cp.filter_dup = filter_dup; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), - &enable_cp); - } -} - -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa); -static int hci_update_random_address(struct hci_request *req, - bool require_privacy, bool use_rpa, - u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (rpa_valid(hdev)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - set_random_addr(req, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } - - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configured, then use that - * address instead of the public BR/EDR address. - */ - if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } - - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - -/* Ensure to call hci_req_add_le_scan_disable() first to disable the - * controller based address resolution to be able to reconfigure - * resolving list. - */ -void hci_req_add_le_passive_scan(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 own_addr_type; - u8 filter_policy; - u16 window, interval; - /* Default is to enable duplicates filter */ - u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - /* Background scanning should run with address resolution */ - bool addr_resolv = true; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return; - } - - /* Set require_privacy to false since no SCAN_REQ are send - * during passive scanning. Not using an non-resolvable address - * here is important so that peer devices using direct - * advertising with our address will be correctly reported - * by the controller. - */ - if (hci_update_random_address(req, false, scan_use_rpa(hdev), - &own_addr_type)) - return; - - if (hdev->enable_advmon_interleave_scan && - __hci_update_interleaved_scan(hdev)) - return; - - bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state); - /* Adding or removing entries from the accept list must - * happen before enabling scanning. The controller does - * not allow accept list modification while scanning. - */ - filter_policy = update_accept_list(req); - - /* When the controller is using random resolvable addresses and - * with that having LE privacy enabled, then controllers with - * Extended Scanner Filter Policies support can now enable support - * for handling directed advertising. - * - * So instead of using filter polices 0x00 (no accept list) - * and 0x01 (accept list enabled) use the new filter policies - * 0x02 (no accept list) and 0x03 (accept list enabled). - */ - if (hci_dev_test_flag(hdev, HCI_PRIVACY) && - (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)) - filter_policy |= 0x02; - - if (hdev->suspended) { - window = hdev->le_scan_window_suspend; - interval = hdev->le_scan_int_suspend; - } else if (hci_is_le_conn_scanning(hdev)) { - window = hdev->le_scan_window_connect; - interval = hdev->le_scan_int_connect; - } else if (hci_is_adv_monitoring(hdev)) { - window = hdev->le_scan_window_adv_monitor; - interval = hdev->le_scan_int_adv_monitor; - - /* Disable duplicates filter when scanning for advertisement - * monitor for the following reasons. - * - * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm - * controllers ignore RSSI_Sampling_Period when the duplicates - * filter is enabled. - * - * For SW pattern filtering, when we're not doing interleaved - * scanning, it is necessary to disable duplicates filter, - * otherwise hosts can only receive one advertisement and it's - * impossible to know if a peer is still in range. - */ - filter_dup = LE_SCAN_FILTER_DUP_DISABLE; - } else { - window = hdev->le_scan_window; - interval = hdev->le_scan_interval; - } - - bt_dev_dbg(hdev, "LE passive scan with accept list = %d", - filter_policy); - hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window, - own_addr_type, filter_policy, filter_dup, - addr_resolv); -} - -static int hci_req_add_le_interleaved_scan(struct hci_request *req, - unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - int ret = 0; - - hci_dev_lock(hdev); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req, false); - hci_req_add_le_passive_scan(req); - - switch (hdev->interleave_scan_state) { - case INTERLEAVE_SCAN_ALLOWLIST: - bt_dev_dbg(hdev, "next state: allowlist"); - hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; - break; - case INTERLEAVE_SCAN_NO_FILTER: - bt_dev_dbg(hdev, "next state: no filter"); - hdev->interleave_scan_state = INTERLEAVE_SCAN_ALLOWLIST; - break; - case INTERLEAVE_SCAN_NONE: - BT_ERR("unexpected error"); - ret = -1; - } - - hci_dev_unlock(hdev); - - return ret; -} - -static void interleave_scan_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - interleave_scan.work); - u8 status; - unsigned long timeout; - - if (hdev->interleave_scan_state == INTERLEAVE_SCAN_ALLOWLIST) { - timeout = msecs_to_jiffies(hdev->advmon_allowlist_duration); - } else if (hdev->interleave_scan_state == INTERLEAVE_SCAN_NO_FILTER) { - timeout = msecs_to_jiffies(hdev->advmon_no_filter_duration); - } else { - bt_dev_err(hdev, "unexpected error"); - return; - } - - hci_req_sync(hdev, hci_req_add_le_interleaved_scan, 0, - HCI_CMD_TIMEOUT, &status); - - /* Don't continue interleaving if it was canceled */ - if (is_interleave_scanning(hdev)) - queue_delayed_work(hdev->req_workqueue, - &hdev->interleave_scan, timeout); -} - -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) -{ - struct hci_dev *hdev = req->hdev; - - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { - bt_dev_dbg(hdev, "Deferring random address update"); - hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - return; - } - - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - -void hci_request_setup(struct hci_dev *hdev) -{ - INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); -} - -void hci_request_cancel_all(struct hci_dev *hdev) -{ - hci_cmd_sync_cancel_sync(hdev, ENODEV); - - cancel_interleave_scan(hdev); -} diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h deleted file mode 100644 index b730da4a8b47..000000000000 --- a/net/bluetooth/hci_request.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - BlueZ - Bluetooth protocol stack for Linux - Copyright (C) 2014 Intel Corporation - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License version 2 as - published by the Free Software Foundation; - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. - IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS - SOFTWARE IS DISCLAIMED. -*/ - -#include - -void hci_req_init(struct hci_request *req, struct hci_dev *hdev); -void hci_req_purge(struct hci_request *req); -bool hci_req_status_pend(struct hci_dev *hdev); -int hci_req_run(struct hci_request *req, hci_req_complete_t complete); -int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); -void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode, - struct sk_buff *skb); -void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, - const void *param); -void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, - const void *param, u8 event); -void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, - hci_req_complete_t *req_complete, - hci_req_complete_skb_t *req_complete_skb); - -int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status); -int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, - unsigned long opt), - unsigned long opt, u32 timeout, u8 *hci_status); - -struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, - const void *param); - -void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); -void hci_req_add_le_passive_scan(struct hci_request *req); - -void hci_request_setup(struct hci_dev *hdev); -void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3ab1558ff391..40d4887c7f79 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -33,7 +33,6 @@ #include #include -#include "hci_request.h" #include "smp.h" #include "mgmt_util.h" #include "mgmt_config.h" diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index d039683d3bdd..5a8ccc491b14 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -7,7 +7,6 @@ #include #include -#include "hci_request.h" #include "mgmt_util.h" #include "msft.h" -- cgit v1.2.3 From e50bfd6bb231a6e2b7221ad78dce294330238c76 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:33 +0200 Subject: net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask Timestamping software or hardware flags are often used as a group, therefore adding these masks will ease future use. I did not use SOF_TIMESTAMPING_SYS_HARDWARE flag as it is deprecated and not used at all. Reviewed-by: Willem de Bruijn Reviewed-by: Florian Fainelli Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-1-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/net_tstamp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index eb01c37e71e0..3799c79b6c83 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -5,6 +5,14 @@ #include +#define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ + SOF_TIMESTAMPING_TX_SOFTWARE | \ + SOF_TIMESTAMPING_SOFTWARE) + +#define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ + SOF_TIMESTAMPING_TX_HARDWARE | \ + SOF_TIMESTAMPING_RAW_HARDWARE) + enum hwtstamp_source { HWTSTAMP_SOURCE_NETDEV, HWTSTAMP_SOURCE_PHYLIB, -- cgit v1.2.3 From 2dd35600590148d843367c04975acad3c1a527c3 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:36 +0200 Subject: net: Change the API of PHY default timestamp to MAC Change the API to select MAC default time stamping instead of the PHY. Indeed the PHY is closer to the wire therefore theoretically it has less delay than the MAC timestamping but the reality is different. Due to lower time stamping clock frequency, latency in the MDIO bus and no PHC hardware synchronization between different PHY, the PHY PTP is often less precise than the MAC. The exception is for PHY designed specially for PTP case but these devices are not very widespread. For not breaking the compatibility default_timestamp flag has been introduced in phy_device that is set by the phy driver to know we are using the old API behavior. Reviewed-by: Rahul Rameshbabu Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-4-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm-phy-ptp.c | 3 +++ drivers/net/phy/dp83640.c | 2 ++ drivers/net/phy/micrel.c | 6 ++++++ drivers/net/phy/mscc/mscc_ptp.c | 3 +++ drivers/net/phy/nxp-c45-tja11xx.c | 3 +++ include/linux/phy.h | 19 +++++++++++++++++++ net/core/dev_ioctl.c | 8 +++----- net/core/timestamping.c | 5 +++-- net/ethtool/common.c | 2 +- 9 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index 617d384d4551..d3e825c951ee 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -931,6 +931,9 @@ struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) return ERR_CAST(clock); priv->ptp_clock = clock; + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + priv->phydev = phydev; bcm_ptp_init(priv); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 5c42c47dc564..d3e72d5c1472 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1447,6 +1447,8 @@ static int dp83640_probe(struct phy_device *phydev) for (i = 0; i < MAX_RXTS; i++) list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool); + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; phydev->mii_ts = &dp83640->mii_ts; phydev->priv = dp83640; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index ebafedde0ab7..8d57225d8575 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -3781,6 +3781,9 @@ static void lan8814_ptp_init(struct phy_device *phydev) ptp_priv->mii_ts.ts_info = lan8814_ts_info; phydev->mii_ts = &ptp_priv->mii_ts; + + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; } static int lan8814_ptp_probe_once(struct phy_device *phydev) @@ -5279,6 +5282,9 @@ static int lan8841_probe(struct phy_device *phydev) phydev->mii_ts = &ptp_priv->mii_ts; + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + return 0; } diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index eb0b032cb613..e66d20eff7c4 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1570,6 +1570,9 @@ int vsc8584_ptp_probe(struct phy_device *phydev) return PTR_ERR(vsc8531->load_save); } + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + vsc8531->ptp->phydev = phydev; return 0; diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 3cf614b4cd52..d18c133e6013 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1660,6 +1660,9 @@ static int nxp_c45_probe(struct phy_device *phydev) priv->mii_ts.ts_info = nxp_c45_ts_info; phydev->mii_ts = &priv->mii_ts; ret = nxp_c45_init_ptp_clock(priv); + + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; } else { phydev_dbg(phydev, "PTP support not enabled even if the phy supports it"); } diff --git a/include/linux/phy.h b/include/linux/phy.h index bd68f9d8e74f..e7a38137211c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -616,6 +616,8 @@ struct macsec_ops; * handling shall be postponed until PHY has resumed * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume + * @default_timestamp: Flag indicating whether we are using the phy + * timestamp as the default one * @interface: enum phy_interface_t value * @possible_interfaces: bitmap if interface modes that the attached PHY * will switch between depending on media speed. @@ -681,6 +683,8 @@ struct phy_device { unsigned irq_suspended:1; unsigned irq_rerun:1; + unsigned default_timestamp:1; + int rate_matching; enum phy_state state; @@ -1625,6 +1629,21 @@ static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb, phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type); } +/** + * phy_is_default_hwtstamp - Is the PHY hwtstamp the default timestamp + * @phydev: Pointer to phy_device + * + * This is used to get default timestamping device taking into account + * the new API choice, which is selecting the timestamping from MAC by + * default if the phydev does not have default_timestamp flag enabled. + * + * Return: True if phy is the default hw timestamp, false otherwise. + */ +static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) +{ + return phy_has_hwtstamp(phydev) && phydev->default_timestamp; +} + /** * phy_is_internal - Convenience function for testing if a PHY is internal * @phydev: the phy_device struct diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b9719ed3c3fd..8592c052c0f4 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -259,9 +259,7 @@ static int dev_eth_ioctl(struct net_device *dev, * @dev: Network device * @cfg: Timestamping configuration structure * - * Helper for enforcing a common policy that phylib timestamping, if available, - * should take precedence in front of hardware timestamping provided by the - * netdev. + * Helper for calling the default hardware provider timestamping. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and * there only exists a phydev->mii_ts->hwtstamp() method. So this will return @@ -271,7 +269,7 @@ static int dev_eth_ioctl(struct net_device *dev, static int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { - if (phy_has_hwtstamp(dev->phydev)) + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); @@ -327,7 +325,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_has_hwtstamp(dev->phydev); + bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; bool changed = false; int err; diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 04840697fe79..3717fb152ecc 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -25,7 +25,8 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) struct sk_buff *clone; unsigned int type; - if (!skb->sk) + if (!skb->sk || !skb->dev || + !phy_is_default_hwtstamp(skb->dev->phydev)) return; type = classify(skb); @@ -47,7 +48,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) struct mii_timestamper *mii_ts; unsigned int type; - if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) + if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) return false; if (skb_headroom(skb) < ETH_HLEN) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 6b2a360dcdf0..01b7550f12c6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -637,7 +637,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; - if (phy_has_tsinfo(phydev)) + if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) return phy_ts_info(phydev, info); if (ops->get_ts_info) return ops->get_ts_info(dev, info); -- cgit v1.2.3 From bc5a07ed15a3d30df3e8af2ef6bb0472f1d337f9 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:37 +0200 Subject: net: net_tstamp: Add unspec field to hwtstamp_source enumeration Prepare for future support of saving hwtstamp source in PTP xarray by introducing HWTSTAMP_SOURCE_UNSPEC to hwtstamp_source enum, setting it to 0 to match old behavior of no source defined. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-5-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/net_tstamp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 3799c79b6c83..662074b08c94 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -14,6 +14,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE) enum hwtstamp_source { + HWTSTAMP_SOURCE_UNSPEC, HWTSTAMP_SOURCE_NETDEV, HWTSTAMP_SOURCE_PHYLIB, }; -- cgit v1.2.3 From 2111375b85ad173d58e7b8604246a3de60950ac8 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:38 +0200 Subject: net: Add struct kernel_ethtool_ts_info In prevision to add new UAPI for hwtstamp we will be limited to the struct ethtool_ts_info that is currently passed in fixed binary format through the ETHTOOL_GET_TS_INFO ethtool ioctl. It would be good if new kernel code already started operating on an extensible kernel variant of that structure, similar in concept to struct kernel_hwtstamp_config vs struct hwtstamp_config. Since struct ethtool_ts_info is in include/uapi/linux/ethtool.h, here we introduce the kernel-only structure in include/linux/ethtool.h. The manual copy is then made in the function called by ETHTOOL_GET_TS_INFO. Acked-by: Shannon Nelson Acked-by: Alexandra Winter Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-6-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/can/dev/dev.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.h | 2 +- drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c | 2 +- drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h | 2 +- drivers/net/dsa/microchip/ksz_ptp.c | 2 +- drivers/net/dsa/microchip/ksz_ptp.h | 2 +- drivers/net/dsa/mv88e6xxx/hwtstamp.c | 2 +- drivers/net/dsa/mv88e6xxx/hwtstamp.h | 4 ++-- drivers/net/dsa/ocelot/felix.c | 2 +- drivers/net/dsa/sja1105/sja1105_ptp.c | 2 +- drivers/net/dsa/sja1105/sja1105_ptp.h | 2 +- drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c | 2 +- .../net/ethernet/aquantia/atlantic/aq_ethtool.c | 2 +- .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 2 +- drivers/net/ethernet/cadence/macb.h | 2 +- drivers/net/ethernet/cadence/macb_main.c | 4 ++-- drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2 +- .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 +- drivers/net/ethernet/cisco/enic/enic_ethtool.c | 2 +- drivers/net/ethernet/engleder/tsnep_ethtool.c | 2 +- drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 2 +- .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c | 2 +- .../net/ethernet/freescale/enetc/enetc_ethtool.c | 2 +- drivers/net/ethernet/freescale/fec_main.c | 2 +- drivers/net/ethernet/freescale/gianfar_ethtool.c | 2 +- .../net/ethernet/fungible/funeth/funeth_ethtool.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h | 2 +- drivers/net/ethernet/intel/e1000e/ethtool.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 ++-- .../ethernet/mellanox/mlx5/core/ipoib/ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h | 10 ++++----- drivers/net/ethernet/microchip/lan743x_ethtool.c | 2 +- .../ethernet/microchip/lan966x/lan966x_ethtool.c | 2 +- .../net/ethernet/microchip/sparx5/sparx5_ethtool.c | 2 +- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- drivers/net/ethernet/mscc/ocelot_ptp.c | 2 +- .../net/ethernet/pensando/ionic/ionic_ethtool.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ptp.h | 2 +- drivers/net/ethernet/renesas/ravb_main.c | 2 +- drivers/net/ethernet/renesas/rswitch.c | 2 +- drivers/net/ethernet/renesas/rtsn.c | 2 +- drivers/net/ethernet/sfc/ethtool.c | 2 +- drivers/net/ethernet/sfc/falcon/nic.h | 2 +- drivers/net/ethernet/sfc/ptp.c | 2 +- drivers/net/ethernet/sfc/ptp.h | 5 +++-- drivers/net/ethernet/sfc/siena/ethtool.c | 2 +- drivers/net/ethernet/sfc/siena/ptp.c | 2 +- drivers/net/ethernet/sfc/siena/ptp.h | 4 ++-- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 2 +- drivers/net/ethernet/ti/am65-cpsw-ethtool.c | 2 +- drivers/net/ethernet/ti/cpsw_ethtool.c | 4 ++-- drivers/net/ethernet/ti/cpsw_priv.h | 2 +- drivers/net/ethernet/ti/icssg/icssg_ethtool.c | 2 +- drivers/net/ethernet/ti/netcp_ethss.c | 4 ++-- drivers/net/ethernet/xscale/ixp4xx_eth.c | 2 +- drivers/net/macvlan.c | 2 +- drivers/net/netdevsim/ethtool.c | 2 +- drivers/net/phy/bcm-phy-ptp.c | 2 +- drivers/net/phy/dp83640.c | 2 +- drivers/net/phy/micrel.c | 4 ++-- drivers/net/phy/mscc/mscc_ptp.c | 2 +- drivers/net/phy/nxp-c45-tja11xx.c | 2 +- drivers/ptp/ptp_ines.c | 2 +- drivers/s390/net/qeth_ethtool.c | 2 +- include/linux/can/dev.h | 2 +- include/linux/ethtool.h | 25 +++++++++++++++++++--- include/linux/mii_timestamper.h | 2 +- include/linux/phy.h | 2 +- include/net/dsa.h | 2 +- include/soc/mscc/ocelot.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/dsa/user.c | 2 +- net/ethtool/common.c | 6 +++--- net/ethtool/common.h | 2 +- net/ethtool/ioctl.c | 14 +++++++++--- net/ethtool/tsinfo.c | 6 +++--- net/sched/sch_taprio.c | 2 +- 101 files changed, 151 insertions(+), 123 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d19aabf5d4fb..af9ddd3902cc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5755,10 +5755,10 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, } static int bond_ethtool_get_ts_info(struct net_device *bond_dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bonding *bond = netdev_priv(bond_dev); - struct ethtool_ts_info ts_info; + struct kernel_ethtool_ts_info ts_info; struct net_device *real_dev; bool sw_tx_support = false; struct list_head *iter; diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 83e724e0ab87..87828f953073 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -376,7 +376,7 @@ EXPORT_SYMBOL(can_eth_ioctl_hwts); * supporting hardware timestamps */ int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 31c9c127e24b..b50005397463 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -777,7 +777,7 @@ static const struct net_device_ops peak_canfd_netdev_ops = { }; static int peak_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 340297e3bec7..bc86e9b329fd 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1148,7 +1148,7 @@ static int gs_usb_set_phys_id(struct net_device *netdev, } static int gs_usb_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct gs_can *dev = netdev_priv(netdev); diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 1efa39e134f4..3d68fef46ded 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -897,7 +897,7 @@ int peak_usb_set_eeprom(struct net_device *netdev, return 0; } -int pcan_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int pcan_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h index f6cf84bb718f..abab00930b9d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h @@ -145,7 +145,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *tv); int peak_usb_netif_rx_64(struct sk_buff *skb, u32 ts_low, u32 ts_high); void peak_usb_async_complete(struct urb *urb); void peak_usb_restart_complete(struct peak_usb_device *dev); -int pcan_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +int pcan_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); /* common 32-bit CAN channel ID ethtool management */ int peak_usb_get_eeprom_len(struct net_device *netdev); diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c index bd7aacc71a63..ca2500aba96f 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.c @@ -16,7 +16,7 @@ #include "hellcreek_ptp.h" int hellcreek_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hellcreek *hellcreek = ds->priv; diff --git a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h index 71af77efb28b..7d88da2134f2 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h +++ b/drivers/net/dsa/hirschmann/hellcreek_hwtstamp.h @@ -48,7 +48,7 @@ void hellcreek_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); int hellcreek_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); long hellcreek_hwtstamp_work(struct ptp_clock_info *ptp); diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index 1fe105913c75..f0bd46e5d4ec 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -293,7 +293,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) /* The function is return back the capability of timestamping feature when * requested through ethtool -T utility */ -int ksz_get_ts_info(struct dsa_switch *ds, int port, struct ethtool_ts_info *ts) +int ksz_get_ts_info(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_info *ts) { struct ksz_device *dev = ds->priv; struct ksz_ptp_data *ptp_data; diff --git a/drivers/net/dsa/microchip/ksz_ptp.h b/drivers/net/dsa/microchip/ksz_ptp.h index 0ca8ca4f804e..2f1783c0d723 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.h +++ b/drivers/net/dsa/microchip/ksz_ptp.h @@ -38,7 +38,7 @@ int ksz_ptp_clock_register(struct dsa_switch *ds); void ksz_ptp_clock_unregister(struct dsa_switch *ds); int ksz_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); int ksz_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr); int ksz_hwtstamp_set(struct dsa_switch *ds, int port, struct ifreq *ifr); void ksz_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c index 331b4ca089ff..49e6e1355142 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.c +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -64,7 +64,7 @@ static int mv88e6xxx_ptp_read(struct mv88e6xxx_chip *chip, int addr, #define TX_TSTAMP_TIMEOUT msecs_to_jiffies(40) int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { const struct mv88e6xxx_ptp_ops *ptp_ops; struct mv88e6xxx_chip *chip; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h index cf7fb6d660b1..85acc758e3eb 100644 --- a/drivers/net/dsa/mv88e6xxx/hwtstamp.h +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h @@ -121,7 +121,7 @@ void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb); int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip); @@ -157,7 +157,7 @@ static inline void mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, } static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return -EOPNOTSUPP; } diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index d12c4e85baa7..e554699f06d4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1234,7 +1234,7 @@ static int felix_get_sset_count(struct dsa_switch *ds, int port, int sset) } static int felix_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ocelot *ocelot = ds->priv; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index a7d41e781398..a1f4ca6ad888 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -111,7 +111,7 @@ int sja1105_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) } int sja1105_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct sja1105_private *priv = ds->priv; struct sja1105_ptp_data *ptp_data = &priv->ptp_data; diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h index 416461ee95d2..8add2bd5f728 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.h +++ b/drivers/net/dsa/sja1105/sja1105_ptp.h @@ -101,7 +101,7 @@ void sja1105pqrs_ptp_cmd_packing(u8 *buf, struct sja1105_ptp_cmd *cmd, enum packing_op op); int sja1105_get_ts_info(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, struct sk_buff *clone); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 58e7e88aae5b..21407a26f806 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -577,7 +577,7 @@ static int xgbe_set_rxfh(struct net_device *netdev, } static int xgbe_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct xgbe_prv_data *pdata = netdev_priv(netdev); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a2606ee3b0a5..d0aecd1d7357 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -652,7 +652,7 @@ static int aq_ethtool_set_wol(struct net_device *ndev, } static int aq_ethtool_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct aq_nic_s *aq_nic = netdev_priv(ndev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 58956ed8f531..c7b56a5e5425 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -3634,7 +3634,7 @@ static int bnx2x_set_channels(struct net_device *dev, } static int bnx2x_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index bf157f6cc042..fcbf38f79c23 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -5013,7 +5013,7 @@ static int bnxt_get_dump_data(struct net_device *dev, struct ethtool_dump *dump, } static int bnxt_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct bnxt *bp = netdev_priv(dev); struct bnxt_ptp_cfg *ptp; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1589a49b876c..0ec5f01551f9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6141,7 +6141,7 @@ static void tg3_refclk_write(struct tg3 *tp, u64 newval) static inline void tg3_full_lock(struct tg3 *tp, int irq_sync); static inline void tg3_full_unlock(struct tg3 *tp); -static int tg3_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +static int tg3_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct tg3 *tp = netdev_priv(dev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 122663ff7834..ea71612f6b36 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1168,7 +1168,7 @@ struct macb_ptp_info { s32 (*get_ptp_max_adj)(void); unsigned int (*get_tsu_rate)(struct macb *bp); int (*get_ts_info)(struct net_device *dev, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_hwtst)(struct net_device *netdev, struct kernel_hwtstamp_config *tstamp_config); int (*set_hwtst)(struct net_device *netdev, diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index cecc3d6e630f..11665be3a22c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3399,7 +3399,7 @@ static s32 gem_get_ptp_max_adj(void) } static int gem_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct macb *bp = netdev_priv(dev); @@ -3440,7 +3440,7 @@ static struct macb_ptp_info gem_ptp_info = { #endif static int macb_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct macb *bp = netdev_priv(netdev); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index d3e07b6ed5e1..5835965dbc32 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -2497,7 +2497,7 @@ ret_intrmod: } static int lio_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct lio *lio = GET_LIO(netdev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 34125b8cd935..6a04d2530176 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -836,7 +836,7 @@ static int nicvf_set_pauseparam(struct net_device *dev, } static int nicvf_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct nicvf *nic = netdev_priv(netdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 47eecde36285..3d091947ae00 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -1550,7 +1550,7 @@ out_free_fw: return ret; } -static int get_ts_info(struct net_device *dev, struct ethtool_ts_info *ts_info) +static int get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *ts_info) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index a42f3f280f3e..f2f1055880b2 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -599,7 +599,7 @@ static int enic_set_rxfh(struct net_device *netdev, } static int enic_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/drivers/net/ethernet/engleder/tsnep_ethtool.c b/drivers/net/ethernet/engleder/tsnep_ethtool.c index 65ec1abc9442..9aa286ba1f00 100644 --- a/drivers/net/ethernet/engleder/tsnep_ethtool.c +++ b/drivers/net/ethernet/engleder/tsnep_ethtool.c @@ -305,7 +305,7 @@ static void tsnep_ethtool_get_channels(struct net_device *netdev, } static int tsnep_ethtool_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct tsnep_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 5bd0b36d1feb..0e923d805732 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -394,7 +394,7 @@ static int dpaa_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) } static int dpaa_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct device *dev = net_dev->dev.parent; struct device_node *mac_node = dev->of_node; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c index e80e9388c71f..7f476519b7ad 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c @@ -794,7 +794,7 @@ int dpaa2_phc_index = -1; EXPORT_SYMBOL(dpaa2_phc_index); static int dpaa2_eth_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { if (!dpaa2_ptp) return ethtool_op_get_ts_info(dev, info); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index f7753ea5b57e..5e684b23c5f5 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -841,7 +841,7 @@ static int enetc_set_coalesce(struct net_device *ndev, } static int enetc_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { int *phc_idx; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index fb19295529a2..a923cb95cdc6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2768,7 +2768,7 @@ static void fec_enet_get_regs(struct net_device *ndev, } static int fec_enet_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct fec_enet_private *fep = netdev_priv(ndev); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 7a15b9245698..f581402ad740 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1448,7 +1448,7 @@ static int gfar_get_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd, } static int gfar_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct gfar_private *priv = netdev_priv(dev); struct platform_device *ptp_dev; diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index 4edd0adfc6c7..7f081e6e8c87 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -1040,7 +1040,7 @@ static int fun_set_rxfh(struct net_device *netdev, } static int fun_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 7cebb08bd320..27dbe367f3d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -786,7 +786,7 @@ struct hnae3_ae_ops { void (*get_rx_hwts)(struct hnae3_handle *handle, struct sk_buff *skb, u32 nsec, u32 sec); int (*get_ts_info)(struct hnae3_handle *handle, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_link_diagnosis_info)(struct hnae3_handle *handle, u32 *status_code); void (*clean_vf_config)(struct hnae3_ae_dev *ae_dev, int num_vfs); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 941cb529d671..b1e988347347 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -2009,7 +2009,7 @@ static int hns3_set_tunable(struct net_device *netdev, ETHTOOL_RING_USE_TX_PUSH) static int hns3_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hnae3_handle *handle = hns3_get_handle(netdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 507d7ce26d83..5fff8ed388f8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -378,7 +378,7 @@ int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr) } int hclge_ptp_get_ts_info(struct hnae3_handle *handle, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index bbee74cd8404..63483636c074 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -138,6 +138,6 @@ int hclge_ptp_set_cfg(struct hclge_dev *hdev, struct ifreq *ifr); int hclge_ptp_init(struct hclge_dev *hdev); void hclge_ptp_uninit(struct hclge_dev *hdev); int hclge_ptp_get_ts_info(struct hnae3_handle *handle, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int hclge_ptp_cfg_qry(struct hclge_dev *hdev, u32 *cfg); #endif diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 85da20778e0f..9364bc2b4eb1 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -2263,7 +2263,7 @@ static int e1000e_set_eee(struct net_device *netdev, struct ethtool_keee *edata) } static int e1000e_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct e1000_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4e28785c9fb2..1d0d2e526adb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2546,7 +2546,7 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } static int i40e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct i40e_pf *pf = i40e_netdev_to_pf(dev); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 62c8205fceba..b8f142f04330 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3434,7 +3434,7 @@ ice_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, } static int -ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +ice_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { struct ice_pf *pf = ice_netdev_to_pf(dev); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 61d72250c0ed..06b9970dffad 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2381,7 +2381,7 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } static int igb_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct igb_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0cd2bd695db1..9c3adb18f922 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1559,7 +1559,7 @@ static int igc_ethtool_set_channels(struct net_device *netdev, } static int igc_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct igc_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 6e6e6f1847b6..4cac76254966 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3170,7 +3170,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev, } static int ixgbe_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ixgbe_adapter *adapter = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 9adf4301c9b1..8c45ad983abc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5259,7 +5259,7 @@ static int mvpp2_get_ts_config(struct mvpp2_port *port, struct ifreq *ifr) } static int mvpp2_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mvpp2_port *port = netdev_priv(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 7f786de61014..0db62eb0dab3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -954,7 +954,7 @@ static u32 otx2_get_link(struct net_device *netdev) } static int otx2_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct otx2_nic *pfvf = netdev_priv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 0606f18e5bbe..943d6918c2ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1884,7 +1884,7 @@ out: } static int mlx4_en_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6a343a8f162f..5fd82c67b6ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1191,7 +1191,7 @@ int mlx5e_set_per_queue_coalesce(struct net_device *dev, u32 queue, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3320f12ba2db..cbb3945529d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1658,7 +1658,7 @@ static int mlx5e_set_pauseparam(struct net_device *netdev, } int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1682,7 +1682,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, } static int mlx5e_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5e_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 779d92b762d3..905bdbaffb9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -136,7 +136,7 @@ static int mlx5i_get_coalesce(struct net_device *netdev, } static int mlx5i_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bb0586b45c8d..8d3c61287696 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -238,7 +238,7 @@ struct mlxsw_sp_ptp_ops { struct hwtstamp_config *config); void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int (*get_stats_count)(void); void (*get_stats_strings)(u8 **p); void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index c79da1411d33..2bed8c86b7cf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1082,7 +1082,7 @@ mlxsw_sp_set_module_eeprom_by_page(struct net_device *dev, } static int -mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +mlxsw_sp_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *info) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index cbb6c75a6620..5b174cb95eb8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -1276,7 +1276,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, } int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); @@ -1661,7 +1661,7 @@ err_get_message_types: } int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index a8b88230959a..769095d4932d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -11,7 +11,7 @@ struct mlxsw_sp; struct mlxsw_sp_port; struct mlxsw_sp_ptp_clock; -static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; @@ -50,7 +50,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlxsw_sp1_get_stats_count(void); void mlxsw_sp1_get_stats_strings(u8 **p); @@ -84,7 +84,7 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, struct hwtstamp_config *config); int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, struct mlxsw_sp_port *mlxsw_sp_port, @@ -152,7 +152,7 @@ static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) } static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return mlxsw_sp_ptp_get_ts_info_noptp(info); } @@ -227,7 +227,7 @@ mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, } static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { return mlxsw_sp_ptp_get_ts_info_noptp(info); } diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c index 0d1740d64676..3a63ec091413 100644 --- a/drivers/net/ethernet/microchip/lan743x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c @@ -1029,7 +1029,7 @@ static int lan743x_ethtool_set_rxfh(struct net_device *netdev, } static int lan743x_ethtool_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct lan743x_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index c0fc85ac5db3..aec7066d83b3 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -538,7 +538,7 @@ static int lan966x_set_pauseparam(struct net_device *dev, } static int lan966x_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct lan966x_port *port = netdev_priv(dev); struct lan966x *lan966x = port->lan966x; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index a06dc5a9b355..4f800c1a435d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1183,7 +1183,7 @@ static void sparx5_config_port_stats(struct sparx5 *sparx5, int portno) } static int sparx5_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct sparx5_port *port = netdev_priv(dev); struct sparx5 *sparx5 = port->sparx5; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 21a87a3fc556..7c9540a71725 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -980,7 +980,7 @@ static int ocelot_port_get_sset_count(struct net_device *dev, int sset) } static int ocelot_port_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot *ocelot = priv->port.ocelot; diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index cb32234a5bf1..b3c28260adf8 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -580,7 +580,7 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) EXPORT_SYMBOL(ocelot_hwstamp_set); int ocelot_get_ts_info(struct ocelot *ocelot, int port, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->phc_index = ocelot->ptp_clock ? ptp_clock_index(ocelot->ptp_clock) : -1; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 185a03514ae3..4619fd74f3e3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -977,7 +977,7 @@ static int ionic_get_module_eeprom(struct net_device *netdev, } static int ionic_get_ts_info(struct net_device *netdev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ionic_lif *lif = netdev_priv(netdev); struct ionic *ionic = lif->ionic; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f497f6ca1018..97b059be1041 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1137,7 +1137,7 @@ static int qede_set_channels(struct net_device *dev, } static int qede_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct qede_dev *edev = netdev_priv(dev); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 747cc5e2bb78..63e3dac4d5f7 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -321,7 +321,7 @@ int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *ifr) sizeof(config)) ? -EFAULT : 0; } -int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info) +int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *info) { struct qede_ptp *ptp = edev->ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h index 1db0f021c645..adafc894797e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.h +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h @@ -17,7 +17,7 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb); int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req); void qede_ptp_disable(struct qede_dev *edev); int qede_ptp_enable(struct qede_dev *edev); -int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts); +int qede_ptp_get_ts_info(struct qede_dev *edev, struct kernel_ethtool_ts_info *ts); static inline void qede_ptp_record_rx_ts(struct qede_dev *edev, union eth_rx_cqe *cqe, diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6605e4f4af53..c02fb296bf7d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1737,7 +1737,7 @@ static int ravb_set_ringparam(struct net_device *ndev, } static int ravb_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct ravb_private *priv = netdev_priv(ndev); const struct ravb_hw_info *hw_info = priv->info; diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 24c90d8f5a44..ff50e20856ec 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1809,7 +1809,7 @@ static const struct net_device_ops rswitch_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int rswitch_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct rswitch_device *rdev = netdev_priv(ndev); diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index ad69d47463cb..577227c007ab 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1213,7 +1213,7 @@ static const struct net_device_ops rtsn_netdev_ops = { }; static int rtsn_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct rtsn_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 0f5c68b8bab7..7c887160e2ef 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -226,7 +226,7 @@ static void efx_ethtool_get_fec_stats(struct net_device *net_dev, } static int efx_ethtool_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_nic *efx = efx_netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h index 9f413474bd9f..ada6e036fd97 100644 --- a/drivers/net/ethernet/sfc/falcon/nic.h +++ b/drivers/net/ethernet/sfc/falcon/nic.h @@ -297,7 +297,7 @@ static inline struct falcon_board *falcon_board(struct ef4_nic *efx) return &data->board; } -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; extern const struct ef4_nic_type falcon_a1_nic_type; extern const struct ef4_nic_type falcon_b0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index c3bffbf0ba2b..6fd2fdbaa418 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1864,7 +1864,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, struct kernel_hwtstamp_config *i return 0; } -void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info) +void efx_ptp_get_ts_info(struct efx_nic *efx, struct kernel_ethtool_ts_info *ts_info) { struct efx_ptp_data *ptp = efx->ptp_data; struct efx_nic *primary = efx->primary; diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h index 2f30dbb490d2..6946203499ef 100644 --- a/drivers/net/ethernet/sfc/ptp.h +++ b/drivers/net/ethernet/sfc/ptp.h @@ -12,7 +12,7 @@ #include #include "net_driver.h" -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_ptp_channel(struct efx_nic *efx); @@ -23,7 +23,8 @@ int efx_ptp_set_ts_config(struct efx_nic *efx, struct netlink_ext_ack *extack); int efx_ptp_get_ts_config(struct efx_nic *efx, struct kernel_hwtstamp_config *config); -void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info); +void efx_ptp_get_ts_info(struct efx_nic *efx, + struct kernel_ethtool_ts_info *ts_info); bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); int efx_ptp_get_mode(struct efx_nic *efx); int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c index 14dd3893bdef..4c182d4edfc2 100644 --- a/drivers/net/ethernet/sfc/siena/ethtool.c +++ b/drivers/net/ethernet/sfc/siena/ethtool.c @@ -226,7 +226,7 @@ static void efx_ethtool_get_fec_stats(struct net_device *net_dev, } static int efx_ethtool_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/siena/ptp.c b/drivers/net/ethernet/sfc/siena/ptp.c index 4b5e2f0ba350..c473a4b6dd44 100644 --- a/drivers/net/ethernet/sfc/siena/ptp.c +++ b/drivers/net/ethernet/sfc/siena/ptp.c @@ -1780,7 +1780,7 @@ static int efx_ptp_ts_init(struct efx_nic *efx, } void efx_siena_ptp_get_ts_info(struct efx_nic *efx, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct efx_ptp_data *ptp = efx->ptp_data; struct efx_nic *primary = efx->primary; diff --git a/drivers/net/ethernet/sfc/siena/ptp.h b/drivers/net/ethernet/sfc/siena/ptp.h index 6352f84424f6..b6133e7c5608 100644 --- a/drivers/net/ethernet/sfc/siena/ptp.h +++ b/drivers/net/ethernet/sfc/siena/ptp.h @@ -12,7 +12,7 @@ #include #include "net_driver.h" -struct ethtool_ts_info; +struct kernel_ethtool_ts_info; void efx_siena_ptp_defer_probe_with_channel(struct efx_nic *efx); struct efx_channel *efx_siena_ptp_channel(struct efx_nic *efx); int efx_siena_ptp_set_ts_config(struct efx_nic *efx, @@ -21,7 +21,7 @@ int efx_siena_ptp_set_ts_config(struct efx_nic *efx, int efx_siena_ptp_get_ts_config(struct efx_nic *efx, struct kernel_hwtstamp_config *config); void efx_siena_ptp_get_ts_info(struct efx_nic *efx, - struct ethtool_ts_info *ts_info); + struct kernel_ethtool_ts_info *ts_info); bool efx_siena_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); int efx_siena_ptp_get_mode(struct efx_nic *efx); int efx_siena_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 18468c0228f0..7008219fd88d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -1199,7 +1199,7 @@ static int stmmac_set_channels(struct net_device *dev, } static int stmmac_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct stmmac_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index a1d0935d1ebe..b60976947da5 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -692,7 +692,7 @@ static void am65_cpsw_get_eth_mac_stats(struct net_device *ndev, }; static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); unsigned int ptp_v2_filter; diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index f7b283353ba2..53ed23d68722 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -717,7 +717,7 @@ err: } #if IS_ENABLED(CONFIG_TI_CPTS) -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct cpsw_common *cpsw = ndev_to_cpsw(ndev); @@ -738,7 +738,7 @@ int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) return 0; } #else -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 7efa72502c86..1f448290b9f4 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -510,6 +510,6 @@ int cpsw_set_ringparam(struct net_device *ndev, int cpsw_set_channels_common(struct net_device *ndev, struct ethtool_channels *chs, cpdma_handler_fn rx_handler); -int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info); +int cpsw_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info); #endif /* DRIVERS_NET_ETHERNET_TI_CPSW_PRIV_H_ */ diff --git a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c index c8d0f45cc5b1..524723ced9ed 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_ethtool.c +++ b/drivers/net/ethernet/ti/icssg/icssg_ethtool.c @@ -110,7 +110,7 @@ static void emac_get_ethtool_stats(struct net_device *ndev, } static int emac_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct prueth_emac *emac = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 02cb6474f6dc..d286709ca3b9 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -1999,7 +1999,7 @@ static int keystone_set_link_ksettings(struct net_device *ndev, #if IS_ENABLED(CONFIG_TI_CPTS) static int keystone_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct netcp_intf *netcp = netdev_priv(ndev); struct gbe_intf *gbe_intf; @@ -2027,7 +2027,7 @@ static int keystone_get_ts_info(struct net_device *ndev, } #else static int keystone_get_ts_info(struct net_device *ndev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 8aff6a73ca0a..56df37f8d50a 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -1015,7 +1015,7 @@ static void ixp4xx_get_drvinfo(struct net_device *dev, } static int ixp4xx_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct port *port = netdev_priv(dev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 67b7ef2d463f..24298a33e0e9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1084,7 +1084,7 @@ static int macvlan_ethtool_get_link_ksettings(struct net_device *dev, } static int macvlan_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct net_device *real_dev = macvlan_dev_real_dev(dev); diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 3f9c9327f149..1436905bc106 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -148,7 +148,7 @@ nsim_get_fec_stats(struct net_device *dev, struct ethtool_fec_stats *fec_stats) } static int nsim_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct netdevsim *ns = netdev_priv(dev); diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index d3e825c951ee..874a1b64b115 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -841,7 +841,7 @@ static int bcm_ptp_hwtstamp(struct mii_timestamper *mii_ts, } static int bcm_ptp_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct bcm_ptp_private *priv = mii2priv(mii_ts); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index d3e72d5c1472..075d2beea716 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1395,7 +1395,7 @@ static void dp83640_txtstamp(struct mii_timestamper *mii_ts, } static int dp83640_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct dp83640_private *dp83640 = container_of(mii_ts, struct dp83640_private, mii_ts); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 8d57225d8575..dd519805deee 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -2552,7 +2552,7 @@ static void lan8814_ptp_tx_ts_get(struct phy_device *phydev, *seq_id = lanphy_read_page_reg(phydev, 5, PTP_TX_MSG_HEADER2); } -static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct ethtool_ts_info *info) +static int lan8814_ts_info(struct mii_timestamper *mii_ts, struct kernel_ethtool_ts_info *info) { struct kszphy_ptp_priv *ptp_priv = container_of(mii_ts, struct kszphy_ptp_priv, mii_ts); struct phy_device *phydev = ptp_priv->phydev; @@ -4317,7 +4317,7 @@ static irqreturn_t lan8841_handle_interrupt(struct phy_device *phydev) } static int lan8841_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct kszphy_ptp_priv *ptp_priv; diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index e66d20eff7c4..c1ddae36a2ae 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1134,7 +1134,7 @@ static int vsc85xx_hwtstamp(struct mii_timestamper *mii_ts, } static int vsc85xx_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct vsc8531_private *vsc8531 = container_of(mii_ts, struct vsc8531_private, mii_ts); diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index d18c133e6013..5af5ade4fc64 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1058,7 +1058,7 @@ nxp_c45_no_ptp_irq: } static int nxp_c45_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info) + struct kernel_ethtool_ts_info *ts_info) { struct nxp_c45_phy *priv = container_of(mii_ts, struct nxp_c45_phy, mii_ts); diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 385643f3f8fe..e6f7d2bf8dde 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -556,7 +556,7 @@ static bool ines_timestamp_expired(struct ines_timestamp *ts) } static int ines_ts_info(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | diff --git a/drivers/s390/net/qeth_ethtool.c b/drivers/s390/net/qeth_ethtool.c index c1caf7734c3e..f184c58ecf24 100644 --- a/drivers/s390/net/qeth_ethtool.c +++ b/drivers/s390/net/qeth_ethtool.c @@ -247,7 +247,7 @@ static int qeth_set_channels(struct net_device *dev, } static int qeth_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { struct qeth_card *card = dev->ml_priv; diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 1b92aed49363..23492213ea35 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -186,7 +186,7 @@ void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..6b38bbda5790 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -18,6 +18,7 @@ #include #include #include +#include struct compat_ethtool_rx_flow_spec { u32 flow_type; @@ -713,6 +714,22 @@ struct ethtool_rxfh_param { u8 input_xfrm; }; +/** + * struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info + * @cmd: command number = %ETHTOOL_GET_TS_INFO + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + */ +struct kernel_ethtool_ts_info { + u32 cmd; + u32 so_timestamping; + int phc_index; + enum hwtstamp_tx_types tx_types; + enum hwtstamp_rx_filters rx_filters; +}; + /** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -1020,7 +1037,7 @@ struct ethtool_ops { int (*get_dump_data)(struct net_device *, struct ethtool_dump *, void *); int (*set_dump)(struct net_device *, struct ethtool_dump *); - int (*get_ts_info)(struct net_device *, struct ethtool_ts_info *); + int (*get_ts_info)(struct net_device *, struct kernel_ethtool_ts_info *); void (*get_ts_stats)(struct net_device *dev, struct ethtool_ts_stats *ts_stats); int (*get_module_info)(struct net_device *, @@ -1181,7 +1198,8 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *eti); /** * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment @@ -1230,7 +1248,8 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * @info: buffer to hold the result * Returns zero on success, non-zero otherwise. */ -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); +int ethtool_get_ts_info_by_layer(struct net_device *dev, + struct kernel_ethtool_ts_info *info); /** * ethtool_sprintf - Write formatted string to ethtool string data diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index 26b04f73f214..995db62570f9 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -59,7 +59,7 @@ struct mii_timestamper { struct phy_device *phydev); int (*ts_info)(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info); + struct kernel_ethtool_ts_info *ts_info); struct device *device; }; diff --git a/include/linux/phy.h b/include/linux/phy.h index e7a38137211c..04ae5c811cfb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1618,7 +1618,7 @@ static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, } static inline int phy_ts_info(struct phy_device *phydev, - struct ethtool_ts_info *tsinfo) + struct kernel_ethtool_ts_info *tsinfo) { return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo); } diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..96efdd9f90c9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -934,7 +934,7 @@ struct dsa_switch_ops { * ethtool timestamp info */ int (*get_ts_info)(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); /* * ethtool MAC merge layer diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e1b40f4e664..6a37b29f4b4c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1016,7 +1016,7 @@ void ocelot_port_get_eth_mac_stats(struct ocelot *ocelot, int port, void ocelot_port_get_eth_phy_stats(struct ocelot *ocelot, int port, struct ethtool_eth_phy_stats *phy_stats); int ocelot_get_ts_info(struct ocelot *ocelot, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, struct netlink_ext_ack *extack); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3efba4f857ac..217be32426b5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -677,7 +677,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev, } static int vlan_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) + struct kernel_ethtool_ts_info *info) { const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); return ethtool_get_ts_info_by_layer(vlan->real_dev, info); diff --git a/net/dsa/user.c b/net/dsa/user.c index e8f56a40b614..f5adfa1d978a 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1729,7 +1729,7 @@ static int dsa_user_set_rxnfc(struct net_device *dev, } static int dsa_user_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *ts) + struct kernel_ethtool_ts_info *ts) { struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 01b7550f12c6..461017a37955 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -629,7 +629,7 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; @@ -651,7 +651,7 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { - struct ethtool_ts_info info = { }; + struct kernel_ethtool_ts_info info = { }; int num = 0; if (!__ethtool_get_ts_info(dev, &info)) @@ -661,7 +661,7 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) } EXPORT_SYMBOL(ethtool_get_phc_vclocks); -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info) { return __ethtool_get_ts_info(dev, info); } diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 28b8aaaf9bcb..b9daeecbd84d 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -44,7 +44,7 @@ bool convert_legacy_settings_to_link_ksettings( const struct ethtool_cmd *legacy_settings); int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max); -int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info); +int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index d72b0fec89af..bc8988ddf51c 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -65,7 +65,8 @@ u32 ethtool_op_get_link(struct net_device *dev) } EXPORT_SYMBOL(ethtool_op_get_link); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) { info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | @@ -2569,13 +2570,20 @@ out: static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr) { - struct ethtool_ts_info info; + struct kernel_ethtool_ts_info kernel_info; + struct ethtool_ts_info info = {}; int err; - err = __ethtool_get_ts_info(dev, &info); + err = __ethtool_get_ts_info(dev, &kernel_info); if (err) return err; + info.cmd = kernel_info.cmd; + info.so_timestamping = kernel_info.so_timestamping; + info.phc_index = kernel_info.phc_index; + info.tx_types = kernel_info.tx_types; + info.rx_filters = kernel_info.rx_filters; + if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 57d496287e52..03d12d6f79ca 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -12,7 +12,7 @@ struct tsinfo_req_info { struct tsinfo_reply_data { struct ethnl_reply_data base; - struct ethtool_ts_info ts_info; + struct kernel_ethtool_ts_info ts_info; struct ethtool_ts_stats stats; }; @@ -55,7 +55,7 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int len = 0; int ret; @@ -136,7 +136,7 @@ static int tsinfo_fill_reply(struct sk_buff *skb, { const struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; - const struct ethtool_ts_info *ts_info = &data->ts_info; + const struct kernel_ethtool_ts_info *ts_info = &data->ts_info; int ret; if (ts_info->so_timestamping) { diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index b284a06b5a75..cc2df9f8c14a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1610,7 +1610,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_ts_info info = { + struct kernel_ethtool_ts_info info = { .cmd = ETHTOOL_GET_TS_INFO, .phc_index = -1, }; -- cgit v1.2.3 From f96eb1172ed8d74cfed7da92d2045ec64028cc38 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 15 Jul 2024 14:30:50 +0200 Subject: dsa: lan9303: consistent naming for PHY address parameter Name it 'addr' instead of 'port' or 'phy'. Signed-off-by: Christian Eggers Link: https://patch.msgid.link/20240715123050.21202-1-ceggers@arri.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lan9303_mdio.c | 8 ++++---- include/linux/dsa/lan9303.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index 167a86f39f27..0ac4857e5ee8 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -58,19 +58,19 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) return 0; } -static int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, +static int lan9303_mdio_phy_write(struct lan9303 *chip, int addr, int reg, u16 val) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); - return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val); + return mdiobus_write_nested(sw_dev->device->bus, addr, reg, val); } -static int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) +static int lan9303_mdio_phy_read(struct lan9303 *chip, int addr, int reg) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); - return mdiobus_read_nested(sw_dev->device->bus, phy, reg); + return mdiobus_read_nested(sw_dev->device->bus, addr, reg); } static const struct lan9303_phy_ops lan9303_mdio_phy_ops = { diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index b4f22112ba75..3ce7cbcc37a3 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -5,8 +5,8 @@ struct lan9303; struct lan9303_phy_ops { /* PHY 1 and 2 access*/ - int (*phy_read)(struct lan9303 *chip, int port, int regnum); - int (*phy_write)(struct lan9303 *chip, int port, + int (*phy_read)(struct lan9303 *chip, int addr, int regnum); + int (*phy_write)(struct lan9303 *chip, int addr, int regnum, u16 val); }; -- cgit v1.2.3 From 70de41ef78573ce958ac04ecc2b5671851723c59 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:05:56 +0200 Subject: llc: Constify struct llc_conn_state_trans 'struct llc_conn_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 13923 10896 32 24851 6113 net/llc/llc_c_st.o After: ===== text data bss dec hex filename 21859 3328 0 25187 6263 net/llc/llc_c_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/87cda89e4c9414e71d1a54bb1eb491b0e7f70375.1720973029.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_c_st.h | 4 +- net/llc/llc_c_st.c | 500 ++++++++++++++++++++++++------------------------- net/llc/llc_conn.c | 20 +- 3 files changed, 262 insertions(+), 262 deletions(-) (limited to 'include') diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 53823d61d8b6..a4bea0f33188 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -44,8 +44,8 @@ struct llc_conn_state_trans { }; struct llc_conn_state { - u8 current_state; - struct llc_conn_state_trans **transitions; + u8 current_state; + const struct llc_conn_state_trans **transitions; }; extern struct llc_conn_state llc_conn_state_table[]; diff --git a/net/llc/llc_c_st.c b/net/llc/llc_c_st.c index 2467573b5f84..1c267db304df 100644 --- a/net/llc/llc_c_st.c +++ b/net/llc/llc_c_st.c @@ -42,7 +42,7 @@ static const llc_conn_action_t llc_common_actions_1[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_1 = { +static const struct llc_conn_state_trans llc_common_state_trans_1 = { .ev = llc_conn_ev_disc_req, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = NONE, @@ -59,7 +59,7 @@ static const llc_conn_action_t llc_common_actions_2[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_2 = { +static const struct llc_conn_state_trans llc_common_state_trans_2 = { .ev = llc_conn_ev_rst_req, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -79,7 +79,7 @@ static const llc_conn_action_t llc_common_actions_3[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_3 = { +static const struct llc_conn_state_trans llc_common_state_trans_3 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -95,7 +95,7 @@ static const llc_conn_action_t llc_common_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_4 = { +static const struct llc_conn_state_trans llc_common_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -114,7 +114,7 @@ static const llc_conn_action_t llc_common_actions_5[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_5 = { +static const struct llc_conn_state_trans llc_common_state_trans_5 = { .ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -129,7 +129,7 @@ static const llc_conn_action_t llc_common_actions_6[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_6 = { +static const struct llc_conn_state_trans llc_common_state_trans_6 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -145,7 +145,7 @@ static const llc_conn_action_t llc_common_actions_7a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_7a = { +static const struct llc_conn_state_trans llc_common_state_trans_7a = { .ev = llc_conn_ev_rx_zzz_cmd_pbit_set_x_inval_nr, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -161,7 +161,7 @@ static const llc_conn_action_t llc_common_actions_7b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_7b = { +static const struct llc_conn_state_trans llc_common_state_trans_7b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_x_inval_ns, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -177,7 +177,7 @@ static const llc_conn_action_t llc_common_actions_8a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8a = { +static const struct llc_conn_state_trans llc_common_state_trans_8a = { .ev = llc_conn_ev_rx_zzz_rsp_fbit_set_x_inval_nr, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -193,7 +193,7 @@ static const llc_conn_action_t llc_common_actions_8b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8b = { +static const struct llc_conn_state_trans llc_common_state_trans_8b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x_inval_ns, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -209,7 +209,7 @@ static const llc_conn_action_t llc_common_actions_8c[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_8c = { +static const struct llc_conn_state_trans llc_common_state_trans_8c = { .ev = llc_conn_ev_rx_bad_pdu, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -225,7 +225,7 @@ static const llc_conn_action_t llc_common_actions_9[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_9 = { +static const struct llc_conn_state_trans llc_common_state_trans_9 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -247,7 +247,7 @@ static const llc_conn_action_t llc_common_actions_10[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_10 = { +static const struct llc_conn_state_trans llc_common_state_trans_10 = { .ev = llc_conn_ev_rx_xxx_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_common_ev_qfyrs_10, @@ -270,7 +270,7 @@ static const llc_conn_action_t llc_common_actions_11a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11a = { +static const struct llc_conn_state_trans llc_common_state_trans_11a = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11a, @@ -292,7 +292,7 @@ static const llc_conn_action_t llc_common_actions_11b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11b = { +static const struct llc_conn_state_trans llc_common_state_trans_11b = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11b, @@ -314,7 +314,7 @@ static const llc_conn_action_t llc_common_actions_11c[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11c = { +static const struct llc_conn_state_trans llc_common_state_trans_11c = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11c, @@ -336,7 +336,7 @@ static const llc_conn_action_t llc_common_actions_11d[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_common_state_trans_11d = { +static const struct llc_conn_state_trans llc_common_state_trans_11d = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_common_ev_qfyrs_11d, @@ -347,7 +347,7 @@ static struct llc_conn_state_trans llc_common_state_trans_11d = { * Common dummy state transition; must be last entry for all state * transition groups - it'll be on .bss, so will be zeroed. */ -static struct llc_conn_state_trans llc_common_state_trans_end; +static const struct llc_conn_state_trans llc_common_state_trans_end; /* LLC_CONN_STATE_ADM transitions */ /* State transitions for LLC_CONN_EV_CONN_REQ event */ @@ -359,7 +359,7 @@ static const llc_conn_action_t llc_adm_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_1 = { +static const struct llc_conn_state_trans llc_adm_state_trans_1 = { .ev = llc_conn_ev_conn_req, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = NONE, @@ -378,7 +378,7 @@ static const llc_conn_action_t llc_adm_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_2 = { +static const struct llc_conn_state_trans llc_adm_state_trans_2 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -392,7 +392,7 @@ static const llc_conn_action_t llc_adm_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_3 = { +static const struct llc_conn_state_trans llc_adm_state_trans_3 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -406,7 +406,7 @@ static const llc_conn_action_t llc_adm_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_4 = { +static const struct llc_conn_state_trans llc_adm_state_trans_4 = { .ev = llc_conn_ev_rx_xxx_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -419,7 +419,7 @@ static const llc_conn_action_t llc_adm_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_adm_state_trans_5 = { +static const struct llc_conn_state_trans llc_adm_state_trans_5 = { .ev = llc_conn_ev_rx_any_frame, .next_state = LLC_CONN_OUT_OF_SVC, .ev_qualifiers = NONE, @@ -430,7 +430,7 @@ static struct llc_conn_state_trans llc_adm_state_trans_5 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_adm_state_transitions[] = { +static const struct llc_conn_state_trans *llc_adm_state_transitions[] = { [0] = &llc_adm_state_trans_1, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* local_busy */ @@ -453,7 +453,7 @@ static const llc_conn_action_t llc_setup_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_1 = { +static const struct llc_conn_state_trans llc_setup_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = NONE, @@ -477,7 +477,7 @@ static const llc_conn_action_t llc_setup_actions_2[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_2 = { +static const struct llc_conn_state_trans llc_setup_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_setup_ev_qfyrs_2, @@ -498,7 +498,7 @@ static const llc_conn_action_t llc_setup_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_3 = { +static const struct llc_conn_state_trans llc_setup_state_trans_3 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_setup_ev_qfyrs_3, @@ -519,7 +519,7 @@ static const llc_conn_action_t llc_setup_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_4 = { +static const struct llc_conn_state_trans llc_setup_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_4, @@ -539,7 +539,7 @@ static const llc_conn_action_t llc_setup_actions_5[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_5 = { +static const struct llc_conn_state_trans llc_setup_state_trans_5 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_5, @@ -560,7 +560,7 @@ static const llc_conn_action_t llc_setup_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_7 = { +static const struct llc_conn_state_trans llc_setup_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_SETUP, .ev_qualifiers = llc_setup_ev_qfyrs_7, @@ -581,7 +581,7 @@ static const llc_conn_action_t llc_setup_actions_8[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_setup_state_trans_8 = { +static const struct llc_conn_state_trans llc_setup_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_setup_ev_qfyrs_8, @@ -592,7 +592,7 @@ static struct llc_conn_state_trans llc_setup_state_trans_8 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_setup_state_transitions[] = { +static const struct llc_conn_state_trans *llc_setup_state_transitions[] = { [0] = &llc_common_state_trans_end, /* Request */ [1] = &llc_common_state_trans_end, /* local busy */ [2] = &llc_common_state_trans_end, /* init_pf_cycle */ @@ -622,7 +622,7 @@ static const llc_conn_action_t llc_normal_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_1 = { +static const struct llc_conn_state_trans llc_normal_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_1, @@ -643,7 +643,7 @@ static const llc_conn_action_t llc_normal_actions_2[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_2 = { +static const struct llc_conn_state_trans llc_normal_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_2, @@ -660,7 +660,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_normal_actions_2_1[1]; -static struct llc_conn_state_trans llc_normal_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_normal_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_2_1, @@ -680,7 +680,7 @@ static const llc_conn_action_t llc_normal_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_3 = { +static const struct llc_conn_state_trans llc_normal_state_trans_3 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_normal_ev_qfyrs_3, @@ -700,7 +700,7 @@ static const llc_conn_action_t llc_normal_actions_4[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_4 = { +static const struct llc_conn_state_trans llc_normal_state_trans_4 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_normal_ev_qfyrs_4, @@ -723,7 +723,7 @@ static const llc_conn_action_t llc_normal_actions_5a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5a = { +static const struct llc_conn_state_trans llc_normal_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5a, @@ -746,7 +746,7 @@ static const llc_conn_action_t llc_normal_actions_5b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5b = { +static const struct llc_conn_state_trans llc_normal_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5b, @@ -769,7 +769,7 @@ static const llc_conn_action_t llc_normal_actions_5c[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_5c = { +static const struct llc_conn_state_trans llc_normal_state_trans_5c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_5c, @@ -790,7 +790,7 @@ static const llc_conn_action_t llc_normal_actions_6a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_6a = { +static const struct llc_conn_state_trans llc_normal_state_trans_6a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_6a, @@ -811,7 +811,7 @@ static const llc_conn_action_t llc_normal_actions_6b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_6b = { +static const struct llc_conn_state_trans llc_normal_state_trans_6b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_normal_ev_qfyrs_6b, @@ -827,7 +827,7 @@ static const llc_conn_action_t llc_normal_actions_7[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_7 = { +static const struct llc_conn_state_trans llc_normal_state_trans_7 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -850,7 +850,7 @@ static const llc_conn_action_t llc_normal_actions_8[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_8a = { +static const struct llc_conn_state_trans llc_normal_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_8a, @@ -863,7 +863,7 @@ static const llc_conn_ev_qfyr_t llc_normal_ev_qfyrs_8b[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_8b = { +static const struct llc_conn_state_trans llc_normal_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_8b, @@ -884,7 +884,7 @@ static const llc_conn_action_t llc_normal_actions_9a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_9a = { +static const struct llc_conn_state_trans llc_normal_state_trans_9a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_9a, @@ -905,7 +905,7 @@ static const llc_conn_action_t llc_normal_actions_9b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_9b = { +static const struct llc_conn_state_trans llc_normal_state_trans_9b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_9b, @@ -922,7 +922,7 @@ static const llc_conn_action_t llc_normal_actions_10[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_10 = { +static const struct llc_conn_state_trans llc_normal_state_trans_10 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -937,7 +937,7 @@ static const llc_conn_action_t llc_normal_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11a = { +static const struct llc_conn_state_trans llc_normal_state_trans_11a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -952,7 +952,7 @@ static const llc_conn_action_t llc_normal_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11b = { +static const struct llc_conn_state_trans llc_normal_state_trans_11b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -973,7 +973,7 @@ static const llc_conn_action_t llc_normal_actions_11c[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_11c = { +static const struct llc_conn_state_trans llc_normal_state_trans_11c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_11c, @@ -990,7 +990,7 @@ static const llc_conn_action_t llc_normal_actions_12[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_12 = { +static const struct llc_conn_state_trans llc_normal_state_trans_12 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1005,7 +1005,7 @@ static const llc_conn_action_t llc_normal_actions_13a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13a = { +static const struct llc_conn_state_trans llc_normal_state_trans_13a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1020,7 +1020,7 @@ static const llc_conn_action_t llc_normal_actions_13b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13b = { +static const struct llc_conn_state_trans llc_normal_state_trans_13b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1040,7 +1040,7 @@ static const llc_conn_action_t llc_normal_actions_13c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_13c = { +static const struct llc_conn_state_trans llc_normal_state_trans_13c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_13c, @@ -1057,7 +1057,7 @@ static const llc_conn_action_t llc_normal_actions_14[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_14 = { +static const struct llc_conn_state_trans llc_normal_state_trans_14 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1080,7 +1080,7 @@ static const llc_conn_action_t llc_normal_actions_15a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_15a = { +static const struct llc_conn_state_trans llc_normal_state_trans_15a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_15a, @@ -1103,7 +1103,7 @@ static const llc_conn_action_t llc_normal_actions_15b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_15b = { +static const struct llc_conn_state_trans llc_normal_state_trans_15b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_15b, @@ -1125,7 +1125,7 @@ static const llc_conn_action_t llc_normal_actions_16a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_16a = { +static const struct llc_conn_state_trans llc_normal_state_trans_16a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_16a, @@ -1147,7 +1147,7 @@ static const llc_conn_action_t llc_normal_actions_16b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_16b = { +static const struct llc_conn_state_trans llc_normal_state_trans_16b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_16b, @@ -1164,7 +1164,7 @@ static const llc_conn_action_t llc_normal_actions_17[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_17 = { +static const struct llc_conn_state_trans llc_normal_state_trans_17 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -1183,7 +1183,7 @@ static const llc_conn_action_t llc_normal_actions_18[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_18 = { +static const struct llc_conn_state_trans llc_normal_state_trans_18 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_18, @@ -1205,7 +1205,7 @@ static const llc_conn_action_t llc_normal_actions_19[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_19 = { +static const struct llc_conn_state_trans llc_normal_state_trans_19 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_19, @@ -1228,7 +1228,7 @@ static const llc_conn_action_t llc_normal_actions_20a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_20a = { +static const struct llc_conn_state_trans llc_normal_state_trans_20a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_20a, @@ -1251,7 +1251,7 @@ static const llc_conn_action_t llc_normal_actions_20b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_20b = { +static const struct llc_conn_state_trans llc_normal_state_trans_20b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_normal_ev_qfyrs_20b, @@ -1270,7 +1270,7 @@ static const llc_conn_action_t llc_normal_actions_21[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_normal_state_trans_21 = { +static const struct llc_conn_state_trans llc_normal_state_trans_21 = { .ev = llc_conn_ev_tx_buffer_full, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_normal_ev_qfyrs_21, @@ -1281,7 +1281,7 @@ static struct llc_conn_state_trans llc_normal_state_trans_21 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_normal_state_transitions[] = { +static const struct llc_conn_state_trans *llc_normal_state_transitions[] = { [0] = &llc_normal_state_trans_1, /* Requests */ [1] = &llc_normal_state_trans_2, [2] = &llc_normal_state_trans_2_1, @@ -1354,7 +1354,7 @@ static const llc_conn_action_t llc_busy_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_1 = { +static const struct llc_conn_state_trans llc_busy_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_1, @@ -1374,7 +1374,7 @@ static const llc_conn_action_t llc_busy_actions_2[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_2 = { +static const struct llc_conn_state_trans llc_busy_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_2, @@ -1391,7 +1391,7 @@ static const llc_conn_ev_qfyr_t llc_busy_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_busy_actions_2_1[1]; -static struct llc_conn_state_trans llc_busy_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_busy_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_2_1, @@ -1411,7 +1411,7 @@ static const llc_conn_action_t llc_busy_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_3 = { +static const struct llc_conn_state_trans llc_busy_state_trans_3 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_3, @@ -1431,7 +1431,7 @@ static const llc_conn_action_t llc_busy_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_4 = { +static const struct llc_conn_state_trans llc_busy_state_trans_4 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_4, @@ -1450,7 +1450,7 @@ static const llc_conn_action_t llc_busy_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_5 = { +static const struct llc_conn_state_trans llc_busy_state_trans_5 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_busy_ev_qfyrs_5, @@ -1469,7 +1469,7 @@ static const llc_conn_action_t llc_busy_actions_6[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_6 = { +static const struct llc_conn_state_trans llc_busy_state_trans_6 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_busy_ev_qfyrs_6, @@ -1488,7 +1488,7 @@ static const llc_conn_action_t llc_busy_actions_7[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_7 = { +static const struct llc_conn_state_trans llc_busy_state_trans_7 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_7, @@ -1507,7 +1507,7 @@ static const llc_conn_action_t llc_busy_actions_8[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_8 = { +static const struct llc_conn_state_trans llc_busy_state_trans_8 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_busy_ev_qfyrs_8, @@ -1529,7 +1529,7 @@ static const llc_conn_action_t llc_busy_actions_9a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_9a = { +static const struct llc_conn_state_trans llc_busy_state_trans_9a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_9a, @@ -1551,7 +1551,7 @@ static const llc_conn_action_t llc_busy_actions_9b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_9b = { +static const struct llc_conn_state_trans llc_busy_state_trans_9b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_9b, @@ -1571,7 +1571,7 @@ static const llc_conn_action_t llc_busy_actions_10a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_10a = { +static const struct llc_conn_state_trans llc_busy_state_trans_10a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_10a, @@ -1591,7 +1591,7 @@ static const llc_conn_action_t llc_busy_actions_10b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_10b = { +static const struct llc_conn_state_trans llc_busy_state_trans_10b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_10b, @@ -1606,7 +1606,7 @@ static const llc_conn_action_t llc_busy_actions_11[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_11 = { +static const struct llc_conn_state_trans llc_busy_state_trans_11 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1624,7 +1624,7 @@ static const llc_conn_action_t llc_busy_actions_12[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_12 = { +static const struct llc_conn_state_trans llc_busy_state_trans_12 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1649,7 +1649,7 @@ static const llc_conn_action_t llc_busy_actions_13a[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_13a = { +static const struct llc_conn_state_trans llc_busy_state_trans_13a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_13a, @@ -1674,7 +1674,7 @@ static const llc_conn_action_t llc_busy_actions_13b[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_13b = { +static const struct llc_conn_state_trans llc_busy_state_trans_13b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_13b, @@ -1697,7 +1697,7 @@ static const llc_conn_action_t llc_busy_actions_14a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_14a = { +static const struct llc_conn_state_trans llc_busy_state_trans_14a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_14a, @@ -1720,7 +1720,7 @@ static const llc_conn_action_t llc_busy_actions_14b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_14b = { +static const struct llc_conn_state_trans llc_busy_state_trans_14b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_14b, @@ -1735,7 +1735,7 @@ static const llc_conn_action_t llc_busy_actions_15a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15a = { +static const struct llc_conn_state_trans llc_busy_state_trans_15a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1750,7 +1750,7 @@ static const llc_conn_action_t llc_busy_actions_15b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15b = { +static const struct llc_conn_state_trans llc_busy_state_trans_15b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1770,7 +1770,7 @@ static const llc_conn_action_t llc_busy_actions_15c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_15c = { +static const struct llc_conn_state_trans llc_busy_state_trans_15c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_15c, @@ -1785,7 +1785,7 @@ static const llc_conn_action_t llc_busy_actions_16[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_16 = { +static const struct llc_conn_state_trans llc_busy_state_trans_16 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1800,7 +1800,7 @@ static const llc_conn_action_t llc_busy_actions_17a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17a = { +static const struct llc_conn_state_trans llc_busy_state_trans_17a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1815,7 +1815,7 @@ static const llc_conn_action_t llc_busy_actions_17b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17b = { +static const struct llc_conn_state_trans llc_busy_state_trans_17b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1835,7 +1835,7 @@ static const llc_conn_action_t llc_busy_actions_17c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_17c = { +static const struct llc_conn_state_trans llc_busy_state_trans_17c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_17c, @@ -1850,7 +1850,7 @@ static const llc_conn_action_t llc_busy_actions_18[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_18 = { +static const struct llc_conn_state_trans llc_busy_state_trans_18 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1872,7 +1872,7 @@ static const llc_conn_action_t llc_busy_actions_19a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_19a = { +static const struct llc_conn_state_trans llc_busy_state_trans_19a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_19a, @@ -1894,7 +1894,7 @@ static const llc_conn_action_t llc_busy_actions_19b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_19b = { +static const struct llc_conn_state_trans llc_busy_state_trans_19b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_19b, @@ -1915,7 +1915,7 @@ static const llc_conn_action_t llc_busy_actions_20a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_20a = { +static const struct llc_conn_state_trans llc_busy_state_trans_20a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_20a, @@ -1936,7 +1936,7 @@ static const llc_conn_action_t llc_busy_actions_20b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_20b = { +static const struct llc_conn_state_trans llc_busy_state_trans_20b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_20b, @@ -1953,7 +1953,7 @@ static const llc_conn_action_t llc_busy_actions_21[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_21 = { +static const struct llc_conn_state_trans llc_busy_state_trans_21 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -1972,7 +1972,7 @@ static const llc_conn_action_t llc_busy_actions_22[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_22 = { +static const struct llc_conn_state_trans llc_busy_state_trans_22 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_22, @@ -1993,7 +1993,7 @@ static const llc_conn_action_t llc_busy_actions_23[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_23 = { +static const struct llc_conn_state_trans llc_busy_state_trans_23 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_23, @@ -2015,7 +2015,7 @@ static const llc_conn_action_t llc_busy_actions_24a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_24a = { +static const struct llc_conn_state_trans llc_busy_state_trans_24a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_24a, @@ -2037,7 +2037,7 @@ static const llc_conn_action_t llc_busy_actions_24b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_24b = { +static const struct llc_conn_state_trans llc_busy_state_trans_24b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_24b, @@ -2060,7 +2060,7 @@ static const llc_conn_action_t llc_busy_actions_25[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_25 = { +static const struct llc_conn_state_trans llc_busy_state_trans_25 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_25, @@ -2079,7 +2079,7 @@ static const llc_conn_action_t llc_busy_actions_26[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_busy_state_trans_26 = { +static const struct llc_conn_state_trans llc_busy_state_trans_26 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_busy_ev_qfyrs_26, @@ -2090,7 +2090,7 @@ static struct llc_conn_state_trans llc_busy_state_trans_26 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_busy_state_transitions[] = { +static const struct llc_conn_state_trans *llc_busy_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_busy_state_trans_1, @@ -2166,7 +2166,7 @@ static const llc_conn_action_t llc_reject_actions_1[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_1 = { +static const struct llc_conn_state_trans llc_reject_state_trans_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_1, @@ -2185,7 +2185,7 @@ static const llc_conn_action_t llc_reject_actions_2[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_2 = { +static const struct llc_conn_state_trans llc_reject_state_trans_2 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_2, @@ -2202,7 +2202,7 @@ static const llc_conn_ev_qfyr_t llc_reject_ev_qfyrs_2_1[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_reject_actions_2_1[1]; -static struct llc_conn_state_trans llc_reject_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_reject_state_trans_2_1 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_2_1, @@ -2222,7 +2222,7 @@ static const llc_conn_action_t llc_reject_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_3 = { +static const struct llc_conn_state_trans llc_reject_state_trans_3 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_reject_ev_qfyrs_3, @@ -2241,7 +2241,7 @@ static const llc_conn_action_t llc_reject_actions_4[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_4 = { +static const struct llc_conn_state_trans llc_reject_state_trans_4 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = llc_reject_ev_qfyrs_4, @@ -2256,7 +2256,7 @@ static const llc_conn_action_t llc_reject_actions_5a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5a = { +static const struct llc_conn_state_trans llc_reject_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2271,7 +2271,7 @@ static const llc_conn_action_t llc_reject_actions_5b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5b = { +static const struct llc_conn_state_trans llc_reject_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2291,7 +2291,7 @@ static const llc_conn_action_t llc_reject_actions_5c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_5c = { +static const struct llc_conn_state_trans llc_reject_state_trans_5c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_5c, @@ -2305,7 +2305,7 @@ static const llc_conn_action_t llc_reject_actions_6[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_6 = { +static const struct llc_conn_state_trans llc_reject_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2330,7 +2330,7 @@ static const llc_conn_action_t llc_reject_actions_7a[] = { }; -static struct llc_conn_state_trans llc_reject_state_trans_7a = { +static const struct llc_conn_state_trans llc_reject_state_trans_7a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_7a, @@ -2354,7 +2354,7 @@ static const llc_conn_action_t llc_reject_actions_7b[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_7b = { +static const struct llc_conn_state_trans llc_reject_state_trans_7b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_7b, @@ -2376,7 +2376,7 @@ static const llc_conn_action_t llc_reject_actions_8a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_8a = { +static const struct llc_conn_state_trans llc_reject_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_8a, @@ -2398,7 +2398,7 @@ static const llc_conn_action_t llc_reject_actions_8b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_8b = { +static const struct llc_conn_state_trans llc_reject_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_reject_ev_qfyrs_8b, @@ -2415,7 +2415,7 @@ static const llc_conn_action_t llc_reject_actions_9[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_9 = { +static const struct llc_conn_state_trans llc_reject_state_trans_9 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -2430,7 +2430,7 @@ static const llc_conn_action_t llc_reject_actions_10a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10a = { +static const struct llc_conn_state_trans llc_reject_state_trans_10a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2445,7 +2445,7 @@ static const llc_conn_action_t llc_reject_actions_10b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10b = { +static const struct llc_conn_state_trans llc_reject_state_trans_10b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2465,7 +2465,7 @@ static const llc_conn_action_t llc_reject_actions_10c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_10c = { +static const struct llc_conn_state_trans llc_reject_state_trans_10c = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_10c, @@ -2480,7 +2480,7 @@ static const llc_conn_action_t llc_reject_actions_11[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_11 = { +static const struct llc_conn_state_trans llc_reject_state_trans_11 = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2495,7 +2495,7 @@ static const llc_conn_action_t llc_reject_actions_12a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12a = { +static const struct llc_conn_state_trans llc_reject_state_trans_12a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2510,7 +2510,7 @@ static const llc_conn_action_t llc_reject_actions_12b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12b = { +static const struct llc_conn_state_trans llc_reject_state_trans_12b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2530,7 +2530,7 @@ static const llc_conn_action_t llc_reject_actions_12c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_12c = { +static const struct llc_conn_state_trans llc_reject_state_trans_12c = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_12c, @@ -2545,7 +2545,7 @@ static const llc_conn_action_t llc_reject_actions_13[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_13 = { +static const struct llc_conn_state_trans llc_reject_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2567,7 +2567,7 @@ static const llc_conn_action_t llc_reject_actions_14a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_14a = { +static const struct llc_conn_state_trans llc_reject_state_trans_14a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_14a, @@ -2589,7 +2589,7 @@ static const llc_conn_action_t llc_reject_actions_14b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_14b = { +static const struct llc_conn_state_trans llc_reject_state_trans_14b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_14b, @@ -2610,7 +2610,7 @@ static const llc_conn_action_t llc_reject_actions_15a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_15a = { +static const struct llc_conn_state_trans llc_reject_state_trans_15a = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_15a, @@ -2631,7 +2631,7 @@ static const llc_conn_action_t llc_reject_actions_15b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_15b = { +static const struct llc_conn_state_trans llc_reject_state_trans_15b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_15b, @@ -2647,7 +2647,7 @@ static const llc_conn_action_t llc_reject_actions_16[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_16 = { +static const struct llc_conn_state_trans llc_reject_state_trans_16 = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2666,7 +2666,7 @@ static const llc_conn_action_t llc_reject_actions_17[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_17 = { +static const struct llc_conn_state_trans llc_reject_state_trans_17 = { .ev = llc_conn_ev_init_p_f_cycle, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_17, @@ -2688,7 +2688,7 @@ static const llc_conn_action_t llc_reject_actions_18[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_18 = { +static const struct llc_conn_state_trans llc_reject_state_trans_18 = { .ev = llc_conn_ev_rej_tmr_exp, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_18, @@ -2710,7 +2710,7 @@ static const llc_conn_action_t llc_reject_actions_19[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_19 = { +static const struct llc_conn_state_trans llc_reject_state_trans_19 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_19, @@ -2733,7 +2733,7 @@ static const llc_conn_action_t llc_reject_actions_20a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_20a = { +static const struct llc_conn_state_trans llc_reject_state_trans_20a = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_20a, @@ -2756,7 +2756,7 @@ static const llc_conn_action_t llc_reject_actions_20b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_reject_state_trans_20b = { +static const struct llc_conn_state_trans llc_reject_state_trans_20b = { .ev = llc_conn_ev_busy_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_reject_ev_qfyrs_20b, @@ -2767,7 +2767,7 @@ static struct llc_conn_state_trans llc_reject_state_trans_20b = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_reject_state_transitions[] = { +static const struct llc_conn_state_trans *llc_reject_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_common_state_trans_end, @@ -2834,7 +2834,7 @@ static const llc_conn_ev_qfyr_t llc_await_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_ev_qfyrs_1_0, @@ -2848,7 +2848,7 @@ static const llc_conn_action_t llc_await_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_state_trans_1 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -2867,7 +2867,7 @@ static const llc_conn_action_t llc_await_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_2 = { +static const struct llc_conn_state_trans llc_await_state_trans_2 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -2883,7 +2883,7 @@ static const llc_conn_action_t llc_await_actions_3a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_3a = { +static const struct llc_conn_state_trans llc_await_state_trans_3a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2899,7 +2899,7 @@ static const llc_conn_action_t llc_await_actions_3b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_3b = { +static const struct llc_conn_state_trans llc_await_state_trans_3b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2916,7 +2916,7 @@ static const llc_conn_action_t llc_await_actions_4[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_state_trans_4 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -2935,7 +2935,7 @@ static const llc_conn_action_t llc_await_actions_5[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_5 = { +static const struct llc_conn_state_trans llc_await_state_trans_5 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -2952,7 +2952,7 @@ static const llc_conn_action_t llc_await_actions_6a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_6a = { +static const struct llc_conn_state_trans llc_await_state_trans_6a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -2969,7 +2969,7 @@ static const llc_conn_action_t llc_await_actions_6b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_6b = { +static const struct llc_conn_state_trans llc_await_state_trans_6b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -2986,7 +2986,7 @@ static const llc_conn_action_t llc_await_actions_7[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_7 = { +static const struct llc_conn_state_trans llc_await_state_trans_7 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3003,7 +3003,7 @@ static const llc_conn_action_t llc_await_actions_8a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_state_trans_8a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3020,7 +3020,7 @@ static const llc_conn_action_t llc_await_actions_8b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_state_trans_8b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3035,7 +3035,7 @@ static const llc_conn_action_t llc_await_actions_9a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9a = { +static const struct llc_conn_state_trans llc_await_state_trans_9a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3050,7 +3050,7 @@ static const llc_conn_action_t llc_await_actions_9b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9b = { +static const struct llc_conn_state_trans llc_await_state_trans_9b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3065,7 +3065,7 @@ static const llc_conn_action_t llc_await_actions_9c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9c = { +static const struct llc_conn_state_trans llc_await_state_trans_9c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3080,7 +3080,7 @@ static const llc_conn_action_t llc_await_actions_9d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_9d = { +static const struct llc_conn_state_trans llc_await_state_trans_9d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3096,7 +3096,7 @@ static const llc_conn_action_t llc_await_actions_10a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_10a = { +static const struct llc_conn_state_trans llc_await_state_trans_10a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3112,7 +3112,7 @@ static const llc_conn_action_t llc_await_actions_10b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_10b = { +static const struct llc_conn_state_trans llc_await_state_trans_10b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3128,7 +3128,7 @@ static const llc_conn_action_t llc_await_actions_11[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_11 = { +static const struct llc_conn_state_trans llc_await_state_trans_11 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3143,7 +3143,7 @@ static const llc_conn_action_t llc_await_actions_12a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_12a = { +static const struct llc_conn_state_trans llc_await_state_trans_12a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3158,7 +3158,7 @@ static const llc_conn_action_t llc_await_actions_12b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_12b = { +static const struct llc_conn_state_trans llc_await_state_trans_12b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3174,7 +3174,7 @@ static const llc_conn_action_t llc_await_actions_13[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3194,7 +3194,7 @@ static const llc_conn_action_t llc_await_actions_14[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_state_trans_14 = { +static const struct llc_conn_state_trans llc_await_state_trans_14 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_ev_qfyrs_14, @@ -3205,7 +3205,7 @@ static struct llc_conn_state_trans llc_await_state_trans_14 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_await_state_trans_1_0, @@ -3263,7 +3263,7 @@ static const llc_conn_ev_qfyr_t llc_await_busy_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_busy_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_await_busy_ev_qfyrs_1_0, @@ -3282,7 +3282,7 @@ static const llc_conn_action_t llc_await_busy_actions_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_1 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_busy_ev_qfyrs_1, @@ -3300,7 +3300,7 @@ static const llc_conn_action_t llc_await_busy_actions_2[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_2 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_2 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = llc_await_busy_ev_qfyrs_2, @@ -3318,7 +3318,7 @@ static const llc_conn_action_t llc_await_busy_actions_3[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_3 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_3 = { .ev = llc_conn_ev_local_busy_cleared, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_busy_ev_qfyrs_3, @@ -3337,7 +3337,7 @@ static const llc_conn_action_t llc_await_busy_actions_4[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_4 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3353,7 +3353,7 @@ static const llc_conn_action_t llc_await_busy_actions_5a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_5a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_5a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3369,7 +3369,7 @@ static const llc_conn_action_t llc_await_busy_actions_5b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_5b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_5b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3385,7 +3385,7 @@ static const llc_conn_action_t llc_await_busy_actions_6[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_6 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3406,7 +3406,7 @@ static const llc_conn_action_t llc_await_busy_actions_7[] = { [9] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_7 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_7 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3424,7 +3424,7 @@ static const llc_conn_action_t llc_await_busy_actions_8a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_8a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3442,7 +3442,7 @@ static const llc_conn_action_t llc_await_busy_actions_8b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_8b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3460,7 +3460,7 @@ static const llc_conn_action_t llc_await_busy_actions_9[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_9 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_9 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3477,7 +3477,7 @@ static const llc_conn_action_t llc_await_busy_actions_10a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_10a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_10a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3494,7 +3494,7 @@ static const llc_conn_action_t llc_await_busy_actions_10b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_10b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_10b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3509,7 +3509,7 @@ static const llc_conn_action_t llc_await_busy_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3524,7 +3524,7 @@ static const llc_conn_action_t llc_await_busy_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3539,7 +3539,7 @@ static const llc_conn_action_t llc_await_busy_actions_11c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11c = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3554,7 +3554,7 @@ static const llc_conn_action_t llc_await_busy_actions_11d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_11d = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_11d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3570,7 +3570,7 @@ static const llc_conn_action_t llc_await_busy_actions_12a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_12a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_12a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3586,7 +3586,7 @@ static const llc_conn_action_t llc_await_busy_actions_12b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_12b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_12b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3602,7 +3602,7 @@ static const llc_conn_action_t llc_await_busy_actions_13[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_13 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_BUSY, .ev_qualifiers = NONE, @@ -3617,7 +3617,7 @@ static const llc_conn_action_t llc_await_busy_actions_14a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_14a = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_14a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3632,7 +3632,7 @@ static const llc_conn_action_t llc_await_busy_actions_14b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_14b = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_14b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3648,7 +3648,7 @@ static const llc_conn_action_t llc_await_busy_actions_15[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_15 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_15 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3668,7 +3668,7 @@ static const llc_conn_action_t llc_await_busy_actions_16[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_busy_state_trans_16 = { +static const struct llc_conn_state_trans llc_await_busy_state_trans_16 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = llc_await_busy_ev_qfyrs_16, @@ -3679,7 +3679,7 @@ static struct llc_conn_state_trans llc_await_busy_state_trans_16 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_busy_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_busy_state_transitions[] = { [0] = &llc_common_state_trans_1, /* Request */ [1] = &llc_common_state_trans_2, [2] = &llc_await_busy_state_trans_1_0, @@ -3739,7 +3739,7 @@ static const llc_conn_ev_qfyr_t llc_await_reject_ev_qfyrs_1_0[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_await_reject_actions_1_0[1]; -static struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = { +static const struct llc_conn_state_trans llc_await_reject_state_trans_1_0 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_reject_ev_qfyrs_1_0, @@ -3753,7 +3753,7 @@ static const llc_conn_action_t llc_await_rejct_actions_1[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_1 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_1 = { .ev = llc_conn_ev_local_busy_detected, .next_state = LLC_CONN_STATE_AWAIT_BUSY, .ev_qualifiers = NONE, @@ -3767,7 +3767,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2a[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_2a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_2a = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3781,7 +3781,7 @@ static const llc_conn_action_t llc_await_rejct_actions_2b[] = { [2] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_2b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_2b = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3796,7 +3796,7 @@ static const llc_conn_action_t llc_await_rejct_actions_3[] = { [3] = NULL }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_3 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_3 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3816,7 +3816,7 @@ static const llc_conn_action_t llc_await_rejct_actions_4[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_4 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_4 = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -3834,7 +3834,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5a[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_5a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_5a = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3852,7 +3852,7 @@ static const llc_conn_action_t llc_await_rejct_actions_5b[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_5b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_5b = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3870,7 +3870,7 @@ static const llc_conn_action_t llc_await_rejct_actions_6[] = { [6] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_6 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_6 = { .ev = llc_conn_ev_rx_i_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT, .ev_qualifiers = NONE, @@ -3887,7 +3887,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7a[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7a = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3904,7 +3904,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7b[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7b = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3921,7 +3921,7 @@ static const llc_conn_action_t llc_await_rejct_actions_7c[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_7c = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_7c = { .ev = llc_conn_ev_rx_i_rsp_fbit_set_1_unexpd_ns, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -3936,7 +3936,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3951,7 +3951,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8b = { .ev = llc_conn_ev_rx_rr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3966,7 +3966,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8c[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8c = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8c = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3981,7 +3981,7 @@ static const llc_conn_action_t llc_await_rejct_actions_8d[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_8d = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_8d = { .ev = llc_conn_ev_rx_rej_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -3997,7 +3997,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9a[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_9a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_9a = { .ev = llc_conn_ev_rx_rr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4013,7 +4013,7 @@ static const llc_conn_action_t llc_await_rejct_actions_9b[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_9b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_9b = { .ev = llc_conn_ev_rx_rej_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4029,7 +4029,7 @@ static const llc_conn_action_t llc_await_rejct_actions_10[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_10 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_10 = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_1, .next_state = LLC_CONN_STATE_REJ, .ev_qualifiers = NONE, @@ -4044,7 +4044,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11a[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_11a = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_11a = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4059,7 +4059,7 @@ static const llc_conn_action_t llc_await_rejct_actions_11b[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_11b = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_11b = { .ev = llc_conn_ev_rx_rnr_rsp_fbit_set_0, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4075,7 +4075,7 @@ static const llc_conn_action_t llc_await_rejct_actions_12[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_12 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_12 = { .ev = llc_conn_ev_rx_rnr_cmd_pbit_set_1, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = NONE, @@ -4095,7 +4095,7 @@ static const llc_conn_action_t llc_await_rejct_actions_13[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { +static const struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { .ev = llc_conn_ev_p_tmr_exp, .next_state = LLC_CONN_STATE_AWAIT_REJ, .ev_qualifiers = llc_await_rejct_ev_qfyrs_13, @@ -4106,7 +4106,7 @@ static struct llc_conn_state_trans llc_await_rejct_state_trans_13 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = { +static const struct llc_conn_state_trans *llc_await_rejct_state_transitions[] = { [0] = &llc_await_reject_state_trans_1_0, [1] = &llc_common_state_trans_1, /* requests */ [2] = &llc_common_state_trans_2, @@ -4171,7 +4171,7 @@ static const llc_conn_action_t llc_d_conn_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_1, @@ -4194,7 +4194,7 @@ static const llc_conn_action_t llc_d_conn_actions_1_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_1_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_1_1, @@ -4218,7 +4218,7 @@ static const llc_conn_action_t llc_d_conn_actions_2[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_2 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_2, @@ -4241,7 +4241,7 @@ static const llc_conn_action_t llc_d_conn_actions_2_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_2_1 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_2_1, @@ -4254,7 +4254,7 @@ static const llc_conn_action_t llc_d_conn_actions_3[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_3 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_3 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = NONE, @@ -4277,7 +4277,7 @@ static const llc_conn_action_t llc_d_conn_actions_4[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_4 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_4 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_4, @@ -4299,7 +4299,7 @@ static const llc_conn_action_t llc_d_conn_actions_4_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_4_1 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_4_1, @@ -4318,7 +4318,7 @@ static const llc_conn_ev_qfyr_t llc_d_conn_ev_qfyrs_5[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_d_conn_actions_5[1]; -static struct llc_conn_state_trans llc_d_conn_state_trans_5 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_5 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = llc_d_conn_ev_qfyrs_5, @@ -4338,7 +4338,7 @@ static const llc_conn_action_t llc_d_conn_actions_6[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_6 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_6 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_D_CONN, .ev_qualifiers = llc_d_conn_ev_qfyrs_6, @@ -4359,7 +4359,7 @@ static const llc_conn_action_t llc_d_conn_actions_7[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_7 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_7, @@ -4379,7 +4379,7 @@ static const llc_conn_action_t llc_d_conn_actions_8[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_d_conn_state_trans_8 = { +static const struct llc_conn_state_trans llc_d_conn_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_d_conn_ev_qfyrs_8, @@ -4390,7 +4390,7 @@ static struct llc_conn_state_trans llc_d_conn_state_trans_8 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_d_conn_state_transitions[] = { +static const struct llc_conn_state_trans *llc_d_conn_state_transitions[] = { [0] = &llc_d_conn_state_trans_5, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4419,7 +4419,7 @@ static const llc_conn_action_t llc_rst_actions_1[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -4447,7 +4447,7 @@ static const llc_conn_action_t llc_rst_actions_2[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_2 = { +static const struct llc_conn_state_trans llc_rst_state_trans_2 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_2, @@ -4475,7 +4475,7 @@ static const llc_conn_action_t llc_rst_actions_2_1[] = { [7] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_2_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_2_1 = { .ev = llc_conn_ev_rx_ua_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_2_1, @@ -4495,7 +4495,7 @@ static const llc_conn_action_t llc_rst_actions_3[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_3 = { +static const struct llc_conn_state_trans llc_rst_state_trans_3 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = llc_rst_ev_qfyrs_3, @@ -4518,7 +4518,7 @@ static const llc_conn_action_t llc_rst_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_4 = { +static const struct llc_conn_state_trans llc_rst_state_trans_4 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_4, @@ -4541,7 +4541,7 @@ static const llc_conn_action_t llc_rst_actions_4_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_4_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_4_1 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_4_1, @@ -4564,7 +4564,7 @@ static const llc_conn_action_t llc_rst_actions_5[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_5 = { +static const struct llc_conn_state_trans llc_rst_state_trans_5 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_5, @@ -4586,7 +4586,7 @@ static const llc_conn_action_t llc_rst_actions_5_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_5_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_5_1 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_5_1, @@ -4602,7 +4602,7 @@ static const llc_conn_ev_qfyr_t llc_rst_ev_qfyrs_6[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_rst_actions_6[1]; -static struct llc_conn_state_trans llc_rst_state_trans_6 = { +static const struct llc_conn_state_trans llc_rst_state_trans_6 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_rst_ev_qfyrs_6, @@ -4623,7 +4623,7 @@ static const llc_conn_action_t llc_rst_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_7 = { +static const struct llc_conn_state_trans llc_rst_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_rst_ev_qfyrs_7, @@ -4644,7 +4644,7 @@ static const llc_conn_action_t llc_rst_actions_8[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_8 = { +static const struct llc_conn_state_trans llc_rst_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_8, @@ -4665,7 +4665,7 @@ static const llc_conn_action_t llc_rst_actions_8_1[] = { [2] = NULL, }; -static struct llc_conn_state_trans llc_rst_state_trans_8_1 = { +static const struct llc_conn_state_trans llc_rst_state_trans_8_1 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = llc_rst_ev_qfyrs_8_1, @@ -4676,7 +4676,7 @@ static struct llc_conn_state_trans llc_rst_state_trans_8_1 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_rst_state_transitions[] = { +static const struct llc_conn_state_trans *llc_rst_state_transitions[] = { [0] = &llc_rst_state_trans_6, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4710,7 +4710,7 @@ static const llc_conn_action_t llc_error_actions_1[] = { [8] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_1 = { +static const struct llc_conn_state_trans llc_error_state_trans_1 = { .ev = llc_conn_ev_rx_sabme_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_NORMAL, .ev_qualifiers = NONE, @@ -4726,7 +4726,7 @@ static const llc_conn_action_t llc_error_actions_2[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_2 = { +static const struct llc_conn_state_trans llc_error_state_trans_2 = { .ev = llc_conn_ev_rx_disc_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4741,7 +4741,7 @@ static const llc_conn_action_t llc_error_actions_3[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_3 = { +static const struct llc_conn_state_trans llc_error_state_trans_3 = { .ev = llc_conn_ev_rx_dm_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4757,7 +4757,7 @@ static const llc_conn_action_t llc_error_actions_4[] = { [4] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_4 = { +static const struct llc_conn_state_trans llc_error_state_trans_4 = { .ev = llc_conn_ev_rx_frmr_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = NONE, @@ -4770,7 +4770,7 @@ static const llc_conn_action_t llc_error_actions_5[] = { [1] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_5 = { +static const struct llc_conn_state_trans llc_error_state_trans_5 = { .ev = llc_conn_ev_rx_xxx_cmd_pbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -4778,7 +4778,7 @@ static struct llc_conn_state_trans llc_error_state_trans_5 = { }; /* State transitions for LLC_CONN_EV_RX_XXX_RSP_Fbit_SET_X event */ -static struct llc_conn_state_trans llc_error_state_trans_6 = { +static const struct llc_conn_state_trans llc_error_state_trans_6 = { .ev = llc_conn_ev_rx_xxx_rsp_fbit_set_x, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = NONE, @@ -4798,7 +4798,7 @@ static const llc_conn_action_t llc_error_actions_7[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_7 = { +static const struct llc_conn_state_trans llc_error_state_trans_7 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_error_ev_qfyrs_7, @@ -4820,7 +4820,7 @@ static const llc_conn_action_t llc_error_actions_8[] = { [5] = NULL, }; -static struct llc_conn_state_trans llc_error_state_trans_8 = { +static const struct llc_conn_state_trans llc_error_state_trans_8 = { .ev = llc_conn_ev_ack_tmr_exp, .next_state = LLC_CONN_STATE_RESET, .ev_qualifiers = llc_error_ev_qfyrs_8, @@ -4836,7 +4836,7 @@ static const llc_conn_ev_qfyr_t llc_error_ev_qfyrs_9[] = { /* just one member, NULL, .bss zeroes it */ static const llc_conn_action_t llc_error_actions_9[1]; -static struct llc_conn_state_trans llc_error_state_trans_9 = { +static const struct llc_conn_state_trans llc_error_state_trans_9 = { .ev = llc_conn_ev_data_req, .next_state = LLC_CONN_STATE_ERROR, .ev_qualifiers = llc_error_ev_qfyrs_9, @@ -4847,7 +4847,7 @@ static struct llc_conn_state_trans llc_error_state_trans_9 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_error_state_transitions[] = { +static const struct llc_conn_state_trans *llc_error_state_transitions[] = { [0] = &llc_error_state_trans_9, /* Request */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* Local busy */ @@ -4873,7 +4873,7 @@ static const llc_conn_action_t llc_temp_actions_1[] = { [3] = NULL, }; -static struct llc_conn_state_trans llc_temp_state_trans_1 = { +static const struct llc_conn_state_trans llc_temp_state_trans_1 = { .ev = llc_conn_ev_disc_req, .next_state = LLC_CONN_STATE_ADM, .ev_qualifiers = NONE, @@ -4884,7 +4884,7 @@ static struct llc_conn_state_trans llc_temp_state_trans_1 = { * Array of pointers; * one to each transition */ -static struct llc_conn_state_trans *llc_temp_state_transitions[] = { +static const struct llc_conn_state_trans *llc_temp_state_transitions[] = { [0] = &llc_temp_state_trans_1, /* requests */ [1] = &llc_common_state_trans_end, [2] = &llc_common_state_trans_end, /* local busy */ diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 0a3f5e0bec00..afc6974eafda 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -34,10 +34,10 @@ static int llc_find_offset(int state, int ev_type); static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, - struct llc_conn_state_trans *trans, + const struct llc_conn_state_trans *trans, struct sk_buff *ev); -static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, - struct sk_buff *skb); +static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, + struct sk_buff *skb); /* Offset table on connection states transition diagram */ static int llc_offset_table[NBR_CONN_STATES][NBR_CONN_EV]; @@ -356,9 +356,9 @@ static void llc_conn_send_pdus(struct sock *sk) */ static int llc_conn_service(struct sock *sk, struct sk_buff *skb) { - int rc = 1; + const struct llc_conn_state_trans *trans; struct llc_sock *llc = llc_sk(sk); - struct llc_conn_state_trans *trans; + int rc = 1; if (llc->state > NBR_CONN_STATES) goto out; @@ -384,10 +384,10 @@ out: * This function finds transition that matches with happened event. * Returns pointer to found transition on success, %NULL otherwise. */ -static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, - struct sk_buff *skb) +static const struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, + struct sk_buff *skb) { - struct llc_conn_state_trans **next_trans; + const struct llc_conn_state_trans **next_trans; const llc_conn_ev_qfyr_t *next_qualifier; struct llc_conn_state_ev *ev = llc_conn_ev(skb); struct llc_sock *llc = llc_sk(sk); @@ -432,7 +432,7 @@ static struct llc_conn_state_trans *llc_qualify_conn_ev(struct sock *sk, * success, 1 to indicate failure of at least one action. */ static int llc_exec_conn_trans_actions(struct sock *sk, - struct llc_conn_state_trans *trans, + const struct llc_conn_state_trans *trans, struct sk_buff *skb) { int rc = 0; @@ -635,8 +635,8 @@ u8 llc_data_accept_state(u8 state) */ static u16 __init llc_find_next_offset(struct llc_conn_state *state, u16 offset) { + const struct llc_conn_state_trans **next_trans; u16 cnt = 0; - struct llc_conn_state_trans **next_trans; for (next_trans = state->transitions + offset; (*next_trans)->ev; next_trans++) -- cgit v1.2.3 From 0970bf676f86c2c4d9bf7e672f5504d390c9fce6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:15:20 +0200 Subject: llc: Constify struct llc_sap_state_trans 'struct llc_sap_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 339 456 24 819 333 net/llc/llc_s_st.o After: ===== text data bss dec hex filename 683 144 0 827 33b net/llc/llc_s_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/9d17587639195ee94b74ff06a11ef97d1833ee52.1720973710.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_s_st.h | 4 ++-- net/llc/llc_s_st.c | 26 +++++++++++++------------- net/llc/llc_sap.c | 12 ++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index ed5b2fa40d32..fca49d483d20 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -29,8 +29,8 @@ struct llc_sap_state_trans { }; struct llc_sap_state { - u8 curr_state; - struct llc_sap_state_trans **transitions; + u8 curr_state; + const struct llc_sap_state_trans **transitions; }; /* only access to SAP state table */ diff --git a/net/llc/llc_s_st.c b/net/llc/llc_s_st.c index 308c616883a4..acccc827c562 100644 --- a/net/llc/llc_s_st.c +++ b/net/llc/llc_s_st.c @@ -24,7 +24,7 @@ * last entry for this state * all members are zeros, .bss zeroes it */ -static struct llc_sap_state_trans llc_sap_state_trans_end; +static const struct llc_sap_state_trans llc_sap_state_trans_end; /* state LLC_SAP_STATE_INACTIVE transition for * LLC_SAP_EV_ACTIVATION_REQ event @@ -34,14 +34,14 @@ static const llc_sap_action_t llc_sap_inactive_state_actions_1[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = { +static const struct llc_sap_state_trans llc_sap_inactive_state_trans_1 = { .ev = llc_sap_ev_activation_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_inactive_state_actions_1, }; /* array of pointers; one to each transition */ -static struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = { +static const struct llc_sap_state_trans *llc_sap_inactive_state_transitions[] = { [0] = &llc_sap_inactive_state_trans_1, [1] = &llc_sap_state_trans_end, }; @@ -52,7 +52,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_1[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_1 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_1 = { .ev = llc_sap_ev_rx_ui, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_1, @@ -64,7 +64,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_2[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_2 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_2 = { .ev = llc_sap_ev_unitdata_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_2, @@ -76,7 +76,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_3[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_3 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_3 = { .ev = llc_sap_ev_xid_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_3, @@ -88,7 +88,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_4[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_4 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_4 = { .ev = llc_sap_ev_rx_xid_c, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_4, @@ -100,7 +100,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_5[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_5 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_5 = { .ev = llc_sap_ev_rx_xid_r, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_5, @@ -112,7 +112,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_6[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_6 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_6 = { .ev = llc_sap_ev_test_req, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_6, @@ -124,7 +124,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_7[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_7 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_7 = { .ev = llc_sap_ev_rx_test_c, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_7 @@ -136,7 +136,7 @@ static const llc_sap_action_t llc_sap_active_state_actions_8[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_8 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_8 = { .ev = llc_sap_ev_rx_test_r, .next_state = LLC_SAP_STATE_ACTIVE, .ev_actions = llc_sap_active_state_actions_8, @@ -150,14 +150,14 @@ static const llc_sap_action_t llc_sap_active_state_actions_9[] = { [1] = NULL, }; -static struct llc_sap_state_trans llc_sap_active_state_trans_9 = { +static const struct llc_sap_state_trans llc_sap_active_state_trans_9 = { .ev = llc_sap_ev_deactivation_req, .next_state = LLC_SAP_STATE_INACTIVE, .ev_actions = llc_sap_active_state_actions_9 }; /* array of pointers; one to each transition */ -static struct llc_sap_state_trans *llc_sap_active_state_transitions[] = { +static const struct llc_sap_state_trans *llc_sap_active_state_transitions[] = { [0] = &llc_sap_active_state_trans_2, [1] = &llc_sap_active_state_trans_1, [2] = &llc_sap_active_state_trans_3, diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 116c0e479183..6cd03c2ae7d5 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -114,12 +114,12 @@ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) * Returns the pointer to found transition on success or %NULL for * failure. */ -static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, - struct sk_buff *skb) +static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, + struct sk_buff *skb) { int i = 0; - struct llc_sap_state_trans *rc = NULL; - struct llc_sap_state_trans **next_trans; + const struct llc_sap_state_trans *rc = NULL; + const struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until @@ -143,7 +143,7 @@ static struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, - struct llc_sap_state_trans *trans, + const struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; @@ -166,8 +166,8 @@ static int llc_exec_sap_trans_actions(struct llc_sap *sap, */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { + const struct llc_sap_state_trans *trans; int rc = 1; - struct llc_sap_state_trans *trans; if (sap->state > LLC_NR_SAP_STATES) goto out; -- cgit v1.2.3 From 6e5c85c003e47fd49b72ca052b9181644e04a520 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:18:58 +0000 Subject: net/sched: flower: refactor control flag definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redefine the flower control flags as an enum, so they are included in BTF info. Make the kernel-side enum a more explicit superset of TCA_FLOWER_KEY_FLAGS_*, new flags still need to be added to both enums, but at least the bit position only has to be defined once. FLOW_DIS_ENCAPSULATION is never set for mask, so it can't be exposed to userspace in an unsupported flags mask error message, so it will be placed one bit position above the last uAPI flag. Suggested-by: Alexander Lobakin Suggested-by: Jakub Kicinski Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 14 +++++++++++--- include/uapi/linux/pkt_cls.h | 3 +++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 3e47e123934d..c3fce070b912 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -7,6 +7,7 @@ #include #include #include +#include struct bpf_prog; struct net; @@ -24,9 +25,16 @@ struct flow_dissector_key_control { u32 flags; }; -#define FLOW_DIS_IS_FRAGMENT BIT(0) -#define FLOW_DIS_FIRST_FRAG BIT(1) -#define FLOW_DIS_ENCAPSULATION BIT(2) +/* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those + * flags are exposed to userspace in some error paths, ie. unsupported flags. + */ +enum flow_dissector_ctrl_flags { + FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, + FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + + /* These flags are internal to the kernel */ + FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), +}; enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index b6d38f5fd7c0..12db276f0c11 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,8 +677,11 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + __TCA_FLOWER_KEY_FLAGS_MAX, }; +#define TCA_FLOWER_KEY_FLAGS_MAX (__TCA_FLOWER_KEY_FLAGS_MAX - 1) + enum { TCA_FLOWER_KEY_CFM_OPT_UNSPEC, TCA_FLOWER_KEY_CFM_MD_LEVEL, -- cgit v1.2.3 From bfda5a63137bc83c344c4d995f404c8e701ff0fa Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:00 +0000 Subject: net/sched: flower: define new tunnel flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define new TCA_FLOWER_KEY_FLAGS_* flags for use in struct flow_dissector_key_control, covering the same flags as currently exposed through TCA_FLOWER_KEY_ENC_FLAGS. Put the new flags under FLOW_DIS_F_*. The idea is that we can later, move the existing flags under FLOW_DIS_F_* as well. The ynl flag names have been taken from the RFC iproute2 patch. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20240713021911.1631517-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/tc.yaml | 4 ++++ include/net/flow_dissector.h | 7 ++++++- include/uapi/linux/pkt_cls.h | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index fbbc928647fa..aa574e3827ab 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -47,6 +47,10 @@ definitions: entries: - frag - firstfrag + - tuncsum + - tundf + - tunoam + - tuncrit - name: tc-stats type: struct diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index c3fce070b912..460ea65b9e59 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -17,7 +17,8 @@ struct sk_buff; * struct flow_dissector_key_control: * @thoff: Transport header offset * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_* - * @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION) + * @flags: Key flags. + * Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*) */ struct flow_dissector_key_control { u16 thoff; @@ -31,6 +32,10 @@ struct flow_dissector_key_control { enum flow_dissector_ctrl_flags { FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_F_TUNNEL_CSUM = TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, + FLOW_DIS_F_TUNNEL_DONT_FRAGMENT = TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, + FLOW_DIS_F_TUNNEL_OAM = TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, + FLOW_DIS_F_TUNNEL_CRIT_OPT = TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, /* These flags are internal to the kernel */ FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 12db276f0c11..3dc4388e944c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,6 +677,10 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM = (1 << 2), + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT = (1 << 3), + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM = (1 << 4), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT = (1 << 5), __TCA_FLOWER_KEY_FLAGS_MAX, }; -- cgit v1.2.3 From 11036bd7a0b3b05c5e1f43d107ddb02abf83adb9 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:06 +0000 Subject: net/sched: cls_flower: rework TCA_FLOWER_KEY_ENC_FLAGS usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes how TCA_FLOWER_KEY_ENC_FLAGS is used, so that it is used with TCA_FLOWER_KEY_FLAGS_* flags, in the same way as TCA_FLOWER_KEY_FLAGS is currently used. Where TCA_FLOWER_KEY_FLAGS uses {key,mask}->control.flags, then TCA_FLOWER_KEY_ENC_FLAGS now uses {key,mask}->enc_control.flags, therefore {key,mask}->enc_flags is now unused. As the generic fl_set_key_flags/fl_dump_key_flags() is used with encap set to true, then fl_{set,dump}_key_enc_flags() is removed. This breaks unreleased userspace API (net-next since 2024-06-04). Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-10-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 4 ++-- net/sched/cls_flower.c | 56 ++++++++++---------------------------------- 2 files changed, 15 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 3dc4388e944c..d36d9cdf0c00 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,8 +554,8 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ - TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ - TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* be32 */ __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2a440f11fe1f..e2239ab01355 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -45,11 +45,11 @@ (TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT | \ TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST) -#define TUNNEL_FLAGS_PRESENT (\ - _BITUL(IP_TUNNEL_CSUM_BIT) | \ - _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ - _BITUL(IP_TUNNEL_OAM_BIT) | \ - _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) +#define TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK \ + (TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM | \ + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT) struct fl_flow_key { struct flow_dissector_key_meta meta; @@ -745,10 +745,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, - TUNNEL_FLAGS_PRESENT), - [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, - TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_BE32, + TCA_FLOWER_KEY_ENC_FLAGS_POLICY_MASK), }; static const struct nla_policy @@ -1866,21 +1866,6 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } -static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, - u32 *flags_mask, struct netlink_ext_ack *extack) -{ - /* mask is mandatory for flags */ - if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { - NL_SET_ERR_MSG(extack, "missing enc_flags mask"); - return -EINVAL; - } - - *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); - *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); - - return 0; -} - static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2123,8 +2108,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb, } if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) - ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, - &mask->enc_flags.flags, extack); + ret = fl_set_key_flags(tb, true, &key->enc_control.flags, + &mask->enc_control.flags, extack); return ret; } @@ -3381,22 +3366,6 @@ err_cfm_opts: return err; } -static int fl_dump_key_enc_flags(struct sk_buff *skb, - struct flow_dissector_key_enc_flags *key, - struct flow_dissector_key_enc_flags *mask) -{ - if (!memchr_inv(mask, 0, sizeof(*mask))) - return 0; - - if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) - return -EMSGSIZE; - - if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) - return -EMSGSIZE; - - return 0; -} - static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3699,7 +3668,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; - if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + if (fl_dump_key_flags(skb, true, key->enc_control.flags, + mask->enc_control.flags)) goto nla_put_failure; return 0; -- cgit v1.2.3 From db5271d50ec155abf287a27fa84e2e33a81dbd55 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sat, 13 Jul 2024 02:19:08 +0000 Subject: flow_dissector: cleanup FLOW_DISSECTOR_KEY_ENC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that TCA_FLOWER_KEY_ENC_FLAGS is unused, as it's former data is stored behind TCA_FLOWER_KEY_ENC_CONTROL, then remove the last bits of FLOW_DISSECTOR_KEY_ENC_FLAGS. FLOW_DISSECTOR_KEY_ENC_FLAGS is unreleased, and have been in net-next since 2024-06-04. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-12-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 9 --------- include/net/ip_tunnels.h | 12 ------------ net/core/flow_dissector.c | 16 +--------------- net/sched/cls_flower.c | 3 --- 4 files changed, 1 insertion(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 460ea65b9e59..ced79dc8e856 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -342,14 +342,6 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 -/** - * struct flow_dissector_key_enc_flags: tunnel metadata control flags - * @flags: tunnel control flags - */ -struct flow_dissector_key_enc_flags { - u32 flags; -}; - enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -384,7 +376,6 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ - FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 3877315cf8b8..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,18 +247,6 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } -static inline void ip_tunnel_set_encflags_present(unsigned long *flags) -{ - IP_TUNNEL_DECLARE_FLAGS(present) = { }; - - __set_bit(IP_TUNNEL_CSUM_BIT, present); - __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); - __set_bit(IP_TUNNEL_OAM_BIT, present); - __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); - - ip_tunnel_flags_or(flags, flags, present); -} - static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a0263a4c5489..1a9ca129fddd 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -385,9 +385,7 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS) && - !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_FLAGS)) + FLOW_DISSECTOR_KEY_ENC_OPTS)) return; info = skb_tunnel_info(skb); @@ -489,18 +487,6 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } - - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_FLAGS)) { - struct flow_dissector_key_enc_flags *enc_flags; - IP_TUNNEL_DECLARE_FLAGS(flags) = {}; - - enc_flags = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_FLAGS, - target_container); - ip_tunnel_set_encflags_present(flags); - ip_tunnel_flags_and(flags, flags, info->key.tun_flags); - enc_flags->flags = bitmap_read(flags, IP_TUNNEL_CSUM_BIT, 32); - } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index e2239ab01355..897d6b683cc6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -85,7 +85,6 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; - struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -2223,8 +2222,6 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); - FL_KEY_SET_IF_MASKED(mask, keys, cnt, - FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } -- cgit v1.2.3 From c5eaf1b3f824369b6dcb33a39232b871d2ca8e33 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Jul 2024 08:49:12 -0700 Subject: PCI: Add Meta Platforms vendor ID Add Meta as a vendor ID for PCI devices so we can use the macro for future drivers. Signed-off-by: Alexander Duyck Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/172079935272.1778861.13619056509276833225.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 677aea20d3e1..76a8f2d6bd64 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2601,6 +2601,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_META 0x1d9b + #define PCI_VENDOR_ID_FUNGIBLE 0x1dad #define PCI_VENDOR_ID_HXT 0x1dbf -- cgit v1.2.3